AnnaMats's picture
Second Push
05c9ac2
import yaml
import pytest
from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert
from mlagents.trainers.settings import RewardSignalType
from mlagents.trainers.ppo.trainer import PPOSettings, TRAINER_NAME as PPO_TRAINER_NAME
from mlagents.trainers.sac.trainer import SACSettings, TRAINER_NAME as SAC_TRAINER_NAME
BRAIN_NAME = "testbehavior"
# Check one per category
BATCH_SIZE = 256
HIDDEN_UNITS = 32
SUMMARY_FREQ = 500
PPO_CONFIG = f"""
default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
beta_schedule: constant
epsilon_schedule: linear
max_steps: 5.0e5
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: ppo
batch_size: {BATCH_SIZE}
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: {HIDDEN_UNITS}
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 2500
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
use_recurrent: false
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
SAC_CONFIG = f"""
default:
trainer: sac
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
hidden_units: 128
init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e5
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: sac
batch_size: {BATCH_SIZE}
buffer_size: 64
buffer_init_steps: 100
hidden_units: {HIDDEN_UNITS}
init_entcoef: 0.01
learning_rate: 3.0e-4
max_steps: 1000
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 1
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
tau: 0.005
use_recurrent: false
curiosity_enc_size: 128
demo_path: None
vis_encode_type: simple
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
CURRICULUM = """
BigWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 200
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]
"""
RANDOMIZATION = """
resampling-interval: 5000
mass:
sampler-type: uniform
min_value: 0.5
max_value: 10
gravity:
sampler-type: uniform
min_value: 7
max_value: 12
scale:
sampler-type: uniform
min_value: 0.75
max_value: 3
"""
@pytest.mark.parametrize("use_recurrent", [True, False])
@pytest.mark.parametrize("trainer_type", [PPO_TRAINER_NAME, SAC_TRAINER_NAME])
def test_convert_behaviors(trainer_type, use_recurrent):
if trainer_type == PPO_TRAINER_NAME:
trainer_config = PPO_CONFIG
trainer_settings_type = PPOSettings
else:
trainer_config = SAC_CONFIG
trainer_settings_type = SACSettings
old_config = yaml.safe_load(trainer_config)
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent
new_config = convert_behaviors(old_config)
# Test that the new config can be converted to TrainerSettings w/o exceptions
trainer_settings = new_config[BRAIN_NAME]
# Test that the trainer_settings contains the settings for BRAIN_NAME and
# the defaults where specified
assert trainer_settings.trainer_type == trainer_type
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals
def test_convert():
old_behaviors = yaml.safe_load(PPO_CONFIG)
old_curriculum = yaml.safe_load(CURRICULUM)
old_sampler = yaml.safe_load(RANDOMIZATION)
config = convert(old_behaviors, old_curriculum, old_sampler)
assert BRAIN_NAME in config["behaviors"]
assert "big_wall_min_height" in config["environment_parameters"]
curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"]
assert len(curriculum) == 4
for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]):
assert curriculum[i][f"Lesson{i}"]["value"] == expected_value
for i, threshold in enumerate([0.1, 0.3, 0.5]):
criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"]
assert criteria["threshold"] == threshold
assert criteria["behavior"] == "BigWallJump"
assert criteria["signal_smoothing"]
assert criteria["min_lesson_length"] == 200
assert criteria["measure"] == "progress"
assert "gravity" in config["environment_parameters"]
gravity = config["environment_parameters"]["gravity"]
assert gravity["sampler_type"] == "uniform"
assert gravity["sampler_parameters"]["min_value"] == 7
assert gravity["sampler_parameters"]["max_value"] == 12
def test_remove_nones():
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}
dict_without_nones = {"hello": {"hello2": 2}}
output = remove_nones(dict_with_nones)
assert output == dict_without_nones