|
import yaml |
|
import pytest |
|
|
|
from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert |
|
from mlagents.trainers.settings import RewardSignalType |
|
from mlagents.trainers.ppo.trainer import PPOSettings, TRAINER_NAME as PPO_TRAINER_NAME |
|
from mlagents.trainers.sac.trainer import SACSettings, TRAINER_NAME as SAC_TRAINER_NAME |
|
|
|
|
|
BRAIN_NAME = "testbehavior" |
|
|
|
|
|
BATCH_SIZE = 256 |
|
HIDDEN_UNITS = 32 |
|
SUMMARY_FREQ = 500 |
|
|
|
PPO_CONFIG = f""" |
|
default: |
|
trainer: ppo |
|
batch_size: 1024 |
|
beta: 5.0e-3 |
|
buffer_size: 10240 |
|
epsilon: 0.2 |
|
hidden_units: 128 |
|
lambd: 0.95 |
|
learning_rate: 3.0e-4 |
|
learning_rate_schedule: linear |
|
beta_schedule: constant |
|
epsilon_schedule: linear |
|
max_steps: 5.0e5 |
|
memory_size: 256 |
|
normalize: false |
|
num_epoch: 3 |
|
num_layers: 2 |
|
time_horizon: 64 |
|
sequence_length: 64 |
|
summary_freq: 10000 |
|
use_recurrent: false |
|
vis_encode_type: simple |
|
reward_signals: |
|
extrinsic: |
|
strength: 1.0 |
|
gamma: 0.99 |
|
|
|
{BRAIN_NAME}: |
|
trainer: ppo |
|
batch_size: {BATCH_SIZE} |
|
beta: 5.0e-3 |
|
buffer_size: 64 |
|
epsilon: 0.2 |
|
hidden_units: {HIDDEN_UNITS} |
|
lambd: 0.95 |
|
learning_rate: 5.0e-3 |
|
max_steps: 2500 |
|
memory_size: 256 |
|
normalize: false |
|
num_epoch: 3 |
|
num_layers: 2 |
|
time_horizon: 64 |
|
sequence_length: 64 |
|
summary_freq: {SUMMARY_FREQ} |
|
use_recurrent: false |
|
reward_signals: |
|
curiosity: |
|
strength: 1.0 |
|
gamma: 0.99 |
|
encoding_size: 128 |
|
""" |
|
|
|
SAC_CONFIG = f""" |
|
default: |
|
trainer: sac |
|
batch_size: 128 |
|
buffer_size: 50000 |
|
buffer_init_steps: 0 |
|
hidden_units: 128 |
|
init_entcoef: 1.0 |
|
learning_rate: 3.0e-4 |
|
learning_rate_schedule: constant |
|
max_steps: 5.0e5 |
|
memory_size: 256 |
|
normalize: false |
|
num_update: 1 |
|
train_interval: 1 |
|
num_layers: 2 |
|
time_horizon: 64 |
|
sequence_length: 64 |
|
summary_freq: 10000 |
|
tau: 0.005 |
|
use_recurrent: false |
|
vis_encode_type: simple |
|
reward_signals: |
|
extrinsic: |
|
strength: 1.0 |
|
gamma: 0.99 |
|
|
|
{BRAIN_NAME}: |
|
trainer: sac |
|
batch_size: {BATCH_SIZE} |
|
buffer_size: 64 |
|
buffer_init_steps: 100 |
|
hidden_units: {HIDDEN_UNITS} |
|
init_entcoef: 0.01 |
|
learning_rate: 3.0e-4 |
|
max_steps: 1000 |
|
memory_size: 256 |
|
normalize: false |
|
num_update: 1 |
|
train_interval: 1 |
|
num_layers: 1 |
|
time_horizon: 64 |
|
sequence_length: 64 |
|
summary_freq: {SUMMARY_FREQ} |
|
tau: 0.005 |
|
use_recurrent: false |
|
curiosity_enc_size: 128 |
|
demo_path: None |
|
vis_encode_type: simple |
|
reward_signals: |
|
curiosity: |
|
strength: 1.0 |
|
gamma: 0.99 |
|
encoding_size: 128 |
|
""" |
|
|
|
CURRICULUM = """ |
|
|
|
BigWallJump: |
|
measure: progress |
|
thresholds: [0.1, 0.3, 0.5] |
|
min_lesson_length: 200 |
|
signal_smoothing: true |
|
parameters: |
|
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|
SmallWallJump: |
|
measure: progress |
|
thresholds: [0.1, 0.3, 0.5] |
|
min_lesson_length: 100 |
|
signal_smoothing: true |
|
parameters: |
|
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|
""" |
|
|
|
RANDOMIZATION = """ |
|
resampling-interval: 5000 |
|
mass: |
|
sampler-type: uniform |
|
min_value: 0.5 |
|
max_value: 10 |
|
gravity: |
|
sampler-type: uniform |
|
min_value: 7 |
|
max_value: 12 |
|
scale: |
|
sampler-type: uniform |
|
min_value: 0.75 |
|
max_value: 3 |
|
""" |
|
|
|
|
|
@pytest.mark.parametrize("use_recurrent", [True, False]) |
|
@pytest.mark.parametrize("trainer_type", [PPO_TRAINER_NAME, SAC_TRAINER_NAME]) |
|
def test_convert_behaviors(trainer_type, use_recurrent): |
|
if trainer_type == PPO_TRAINER_NAME: |
|
trainer_config = PPO_CONFIG |
|
trainer_settings_type = PPOSettings |
|
else: |
|
trainer_config = SAC_CONFIG |
|
trainer_settings_type = SACSettings |
|
|
|
old_config = yaml.safe_load(trainer_config) |
|
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent |
|
new_config = convert_behaviors(old_config) |
|
|
|
|
|
trainer_settings = new_config[BRAIN_NAME] |
|
|
|
|
|
|
|
assert trainer_settings.trainer_type == trainer_type |
|
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type) |
|
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE |
|
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS |
|
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals |
|
|
|
|
|
def test_convert(): |
|
old_behaviors = yaml.safe_load(PPO_CONFIG) |
|
old_curriculum = yaml.safe_load(CURRICULUM) |
|
old_sampler = yaml.safe_load(RANDOMIZATION) |
|
config = convert(old_behaviors, old_curriculum, old_sampler) |
|
assert BRAIN_NAME in config["behaviors"] |
|
assert "big_wall_min_height" in config["environment_parameters"] |
|
|
|
curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"] |
|
assert len(curriculum) == 4 |
|
for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]): |
|
assert curriculum[i][f"Lesson{i}"]["value"] == expected_value |
|
for i, threshold in enumerate([0.1, 0.3, 0.5]): |
|
criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"] |
|
assert criteria["threshold"] == threshold |
|
assert criteria["behavior"] == "BigWallJump" |
|
assert criteria["signal_smoothing"] |
|
assert criteria["min_lesson_length"] == 200 |
|
assert criteria["measure"] == "progress" |
|
|
|
assert "gravity" in config["environment_parameters"] |
|
gravity = config["environment_parameters"]["gravity"] |
|
assert gravity["sampler_type"] == "uniform" |
|
assert gravity["sampler_parameters"]["min_value"] == 7 |
|
assert gravity["sampler_parameters"]["max_value"] == 12 |
|
|
|
|
|
def test_remove_nones(): |
|
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None} |
|
dict_without_nones = {"hello": {"hello2": 2}} |
|
output = remove_nones(dict_with_nones) |
|
assert output == dict_without_nones |
|
|