|
import attr |
|
import cattr |
|
import pickle |
|
import pytest |
|
import yaml |
|
|
|
from typing import Dict, List, Optional |
|
|
|
from mlagents.trainers.settings import ( |
|
RunOptions, |
|
TrainerSettings, |
|
NetworkSettings, |
|
RewardSignalType, |
|
RewardSignalSettings, |
|
CuriositySettings, |
|
EnvironmentSettings, |
|
EnvironmentParameterSettings, |
|
ConstantSettings, |
|
UniformSettings, |
|
GaussianSettings, |
|
MultiRangeUniformSettings, |
|
deep_update_dict, |
|
strict_to_cls, |
|
ScheduleType, |
|
) |
|
from mlagents.trainers.ppo.trainer import PPOSettings, TRAINER_NAME as PPO_TRAINER_NAME |
|
from mlagents.trainers.sac.trainer import SACSettings, TRAINER_NAME as SAC_TRAINER_NAME |
|
|
|
from mlagents.trainers.exception import TrainerConfigError |
|
|
|
TRAINER_SETTING_TYPES = {"ppo": PPOSettings, "sac": SACSettings} |
|
|
|
|
|
def check_if_different(testobj1: object, testobj2: object) -> None: |
|
assert testobj1 is not testobj2 |
|
if attr.has(testobj1.__class__) and attr.has(testobj2.__class__): |
|
for key, val in attr.asdict(testobj1, recurse=False).items(): |
|
if ( |
|
isinstance(val, dict) or isinstance(val, list) or attr.has(val) |
|
) and val != {}: |
|
|
|
check_if_different(val, attr.asdict(testobj2, recurse=False)[key]) |
|
|
|
|
|
def check_dict_is_at_least( |
|
testdict1: Dict, testdict2: Dict, exceptions: Optional[List[str]] = None |
|
) -> None: |
|
""" |
|
Check if everything present in the 1st dict is the same in the second dict. |
|
Excludes things that the second dict has but is not present in the heirarchy of the |
|
1st dict. Used to compare an underspecified config dict structure (e.g. as |
|
would be provided by a user) with a complete one (e.g. as exported by RunOptions). |
|
""" |
|
for key, val in testdict1.items(): |
|
if exceptions is not None and key in exceptions: |
|
continue |
|
assert key in testdict2 |
|
if isinstance(val, dict): |
|
check_dict_is_at_least(val, testdict2[key]) |
|
elif isinstance(val, list): |
|
assert isinstance(testdict2[key], list) |
|
for _el0, _el1 in zip(val, testdict2[key]): |
|
if isinstance(_el0, dict): |
|
check_dict_is_at_least(_el0, _el1) |
|
else: |
|
assert val == testdict2[key] |
|
else: |
|
assert val == testdict2[key] |
|
|
|
|
|
def test_is_new_instance(): |
|
""" |
|
Verify that every instance of RunOptions() and its subclasses |
|
is a new instance (i.e. all factory methods are used properly.) |
|
""" |
|
check_if_different(RunOptions(), RunOptions()) |
|
check_if_different(TrainerSettings(), TrainerSettings()) |
|
|
|
|
|
def test_no_configuration(): |
|
""" |
|
Verify that a new config will have a PPO trainer with extrinsic rewards. |
|
""" |
|
blank_runoptions = RunOptions() |
|
blank_runoptions.behaviors.set_config_specified(False) |
|
assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings) |
|
assert isinstance(blank_runoptions.behaviors["test"].hyperparameters, PPOSettings) |
|
assert ( |
|
RewardSignalType.EXTRINSIC in blank_runoptions.behaviors["test"].reward_signals |
|
) |
|
|
|
|
|
def test_strict_to_cls(): |
|
""" |
|
Test strict structuring method. |
|
""" |
|
|
|
@attr.s(auto_attribs=True) |
|
class TestAttrsClass: |
|
field1: int = 0 |
|
field2: str = "test" |
|
|
|
correct_dict = {"field1": 1, "field2": "test2"} |
|
assert strict_to_cls(correct_dict, TestAttrsClass) == TestAttrsClass(**correct_dict) |
|
|
|
incorrect_dict = {"field3": 1, "field2": "test2"} |
|
|
|
with pytest.raises(TrainerConfigError): |
|
strict_to_cls(incorrect_dict, TestAttrsClass) |
|
|
|
with pytest.raises(TrainerConfigError): |
|
strict_to_cls("non_dict_input", TestAttrsClass) |
|
|
|
|
|
def test_deep_update_dict(): |
|
dict1 = {"a": 1, "b": 2, "c": {"d": 3}} |
|
dict2 = {"a": 2, "c": {"d": 4, "e": 5}} |
|
|
|
deep_update_dict(dict1, dict2) |
|
assert dict1 == {"a": 2, "b": 2, "c": {"d": 4, "e": 5}} |
|
|
|
|
|
def test_trainersettings_structure(): |
|
""" |
|
Test structuring method for TrainerSettings |
|
""" |
|
trainersettings_dict = { |
|
"trainer_type": SAC_TRAINER_NAME, |
|
"hyperparameters": {"batch_size": 1024}, |
|
"max_steps": 1.0, |
|
"reward_signals": {"curiosity": {"encoding_size": 64}}, |
|
} |
|
trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
|
|
assert isinstance(trainer_settings.hyperparameters, SACSettings) |
|
assert trainer_settings.trainer_type == SAC_TRAINER_NAME |
|
assert isinstance(trainer_settings.max_steps, int) |
|
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals |
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
trainersettings_dict = { |
|
"trainer_type": "puppo", |
|
"hyperparameters": {"batch_size": 1024}, |
|
"max_steps": 1.0, |
|
} |
|
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
trainersettings_dict = { |
|
"trainer_type": PPO_TRAINER_NAME, |
|
"hyperparameters": {"notahyperparam": 1024}, |
|
"max_steps": 1.0, |
|
} |
|
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
TrainerSettings.structure("notadict", TrainerSettings) |
|
|
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
trainersettings_dict = {"hyperparameters": {"batch_size": 1024}} |
|
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
|
|
|
|
def test_trainersettingsschedules_structure(): |
|
""" |
|
Test structuring method for Trainer Settings Schedule |
|
""" |
|
trainersettings_dict = { |
|
"trainer_type": PPO_TRAINER_NAME, |
|
"hyperparameters": { |
|
"learning_rate_schedule": "linear", |
|
"beta_schedule": "constant", |
|
}, |
|
} |
|
trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
assert isinstance(trainer_settings.hyperparameters, PPOSettings) |
|
assert ( |
|
trainer_settings.hyperparameters.learning_rate_schedule == ScheduleType.LINEAR |
|
) |
|
assert trainer_settings.hyperparameters.beta_schedule == ScheduleType.CONSTANT |
|
assert trainer_settings.hyperparameters.epsilon_schedule == ScheduleType.LINEAR |
|
|
|
|
|
def test_even_checkpoints_structure(): |
|
""" |
|
Test structuring for even checkpoints |
|
""" |
|
trainersettings_dict = { |
|
"trainer_type": PPO_TRAINER_NAME, |
|
"keep_checkpoints": 2, |
|
"even_checkpoints": True, |
|
"max_steps": 100.0, |
|
} |
|
|
|
trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
assert isinstance(trainer_settings.hyperparameters, PPOSettings) |
|
assert trainer_settings.checkpoint_interval == 50 |
|
|
|
|
|
def test_default_checkpoint_interval_structure(): |
|
""" |
|
Test structuring for even checkpoints |
|
""" |
|
trainersettings_dict = { |
|
"trainer_type": PPO_TRAINER_NAME, |
|
"keep_checkpoints": 2, |
|
"max_steps": 100.0, |
|
} |
|
|
|
trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|
assert isinstance(trainer_settings.hyperparameters, PPOSettings) |
|
assert trainer_settings.checkpoint_interval == 500000 |
|
|
|
|
|
def test_reward_signal_structure(): |
|
""" |
|
Tests the RewardSignalSettings structure method. This one is special b/c |
|
it takes in a Dict[RewardSignalType, RewardSignalSettings]. |
|
""" |
|
reward_signals_dict = { |
|
"extrinsic": {"strength": 1.0}, |
|
"curiosity": {"strength": 1.0}, |
|
} |
|
reward_signals = RewardSignalSettings.structure( |
|
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|
) |
|
assert isinstance(reward_signals[RewardSignalType.EXTRINSIC], RewardSignalSettings) |
|
assert isinstance(reward_signals[RewardSignalType.CURIOSITY], CuriositySettings) |
|
|
|
|
|
reward_signals_dict = {"puppo": {"strength": 1.0}} |
|
with pytest.raises(ValueError): |
|
RewardSignalSettings.structure( |
|
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|
) |
|
|
|
|
|
reward_signals_dict = {"gail": {"strength": 1.0}} |
|
with pytest.raises(TypeError): |
|
RewardSignalSettings.structure( |
|
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|
) |
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
RewardSignalSettings.structure( |
|
"notadict", Dict[RewardSignalType, RewardSignalSettings] |
|
) |
|
|
|
|
|
def test_memory_settings_validation(): |
|
with pytest.raises(TrainerConfigError): |
|
NetworkSettings.MemorySettings(sequence_length=128, memory_size=63) |
|
|
|
with pytest.raises(TrainerConfigError): |
|
NetworkSettings.MemorySettings(sequence_length=128, memory_size=0) |
|
|
|
|
|
def test_env_parameter_structure(): |
|
""" |
|
Tests the EnvironmentParameterSettings structure method and all validators. |
|
""" |
|
env_params_dict = { |
|
"mass": { |
|
"sampler_type": "uniform", |
|
"sampler_parameters": {"min_value": 1.0, "max_value": 2.0}, |
|
}, |
|
"scale": { |
|
"sampler_type": "gaussian", |
|
"sampler_parameters": {"mean": 1.0, "st_dev": 2.0}, |
|
}, |
|
"length": { |
|
"sampler_type": "multirangeuniform", |
|
"sampler_parameters": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}, |
|
}, |
|
"gravity": 1, |
|
"wall_height": { |
|
"curriculum": [ |
|
{ |
|
"name": "Lesson1", |
|
"completion_criteria": { |
|
"measure": "reward", |
|
"behavior": "fake_behavior", |
|
"threshold": 10, |
|
}, |
|
"value": 1, |
|
}, |
|
{"value": 4, "name": "Lesson2"}, |
|
] |
|
}, |
|
} |
|
env_param_settings = EnvironmentParameterSettings.structure( |
|
env_params_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
assert isinstance(env_param_settings["mass"].curriculum[0].value, UniformSettings) |
|
assert isinstance(env_param_settings["scale"].curriculum[0].value, GaussianSettings) |
|
assert isinstance( |
|
env_param_settings["length"].curriculum[0].value, MultiRangeUniformSettings |
|
) |
|
|
|
|
|
assert ( |
|
str(env_param_settings["mass"].curriculum[0].value) |
|
== "Uniform sampler: min=1.0, max=2.0" |
|
) |
|
assert ( |
|
str(env_param_settings["scale"].curriculum[0].value) |
|
== "Gaussian sampler: mean=1.0, stddev=2.0" |
|
) |
|
assert ( |
|
str(env_param_settings["length"].curriculum[0].value) |
|
== "MultiRangeUniform sampler: intervals=[(1.0, 2.0), (3.0, 4.0)]" |
|
) |
|
assert str(env_param_settings["gravity"].curriculum[0].value) == "Float: value=1" |
|
|
|
assert isinstance( |
|
env_param_settings["wall_height"].curriculum[0].value, ConstantSettings |
|
) |
|
assert isinstance( |
|
env_param_settings["wall_height"].curriculum[1].value, ConstantSettings |
|
) |
|
|
|
|
|
invalid_distribution_dict = { |
|
"mass": { |
|
"sampler_type": "beta", |
|
"sampler_parameters": {"alpha": 1.0, "beta": 2.0}, |
|
} |
|
} |
|
with pytest.raises(ValueError): |
|
EnvironmentParameterSettings.structure( |
|
invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
|
|
invalid_distribution_dict = { |
|
"mass": { |
|
"sampler_type": "uniform", |
|
"sampler_parameters": {"min_value": 2.0, "max_value": 1.0}, |
|
} |
|
} |
|
with pytest.raises(TrainerConfigError): |
|
EnvironmentParameterSettings.structure( |
|
invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
|
|
invalid_distribution_dict = { |
|
"mass": { |
|
"sampler_type": "multirangeuniform", |
|
"sampler_parameters": {"intervals": [[2.0, 1.0]]}, |
|
} |
|
} |
|
with pytest.raises(TrainerConfigError): |
|
EnvironmentParameterSettings.structure( |
|
invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
|
|
invalid_distribution_dict = { |
|
"mass": { |
|
"sampler_type": "multirangeuniform", |
|
"sampler_parameters": {"intervals": [[1.0, 2.0], [3.0]]}, |
|
} |
|
} |
|
with pytest.raises(TrainerConfigError): |
|
EnvironmentParameterSettings.structure( |
|
invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
|
|
with pytest.raises(TrainerConfigError): |
|
EnvironmentParameterSettings.structure( |
|
"notadict", Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
invalid_curriculum_dict = { |
|
"wall_height": { |
|
"curriculum": [ |
|
{ |
|
"name": "Lesson1", |
|
"completion_criteria": { |
|
"measure": "progress", |
|
"behavior": "fake_behavior", |
|
"threshold": 10, |
|
}, |
|
"value": 1, |
|
}, |
|
{"value": 4, "name": "Lesson2"}, |
|
] |
|
} |
|
} |
|
with pytest.raises(TrainerConfigError): |
|
EnvironmentParameterSettings.structure( |
|
invalid_curriculum_dict, Dict[str, EnvironmentParameterSettings] |
|
) |
|
|
|
|
|
@pytest.mark.parametrize("use_defaults", [True, False]) |
|
def test_exportable_settings(use_defaults): |
|
""" |
|
Test that structuring and unstructuring a RunOptions object results in the same |
|
configuration representation. |
|
""" |
|
|
|
|
|
test_yaml = """ |
|
behaviors: |
|
3DBall: |
|
trainer_type: sac |
|
hyperparameters: |
|
learning_rate: 0.0004 |
|
learning_rate_schedule: constant |
|
batch_size: 64 |
|
buffer_size: 200000 |
|
buffer_init_steps: 100 |
|
tau: 0.006 |
|
steps_per_update: 10.0 |
|
save_replay_buffer: true |
|
init_entcoef: 0.5 |
|
reward_signal_steps_per_update: 10.0 |
|
network_settings: |
|
deterministic: true |
|
normalize: false |
|
hidden_units: 256 |
|
num_layers: 3 |
|
vis_encode_type: nature_cnn |
|
memory: |
|
memory_size: 1288 |
|
sequence_length: 12 |
|
reward_signals: |
|
extrinsic: |
|
gamma: 0.999 |
|
strength: 1.0 |
|
curiosity: |
|
gamma: 0.999 |
|
strength: 1.0 |
|
keep_checkpoints: 5 |
|
max_steps: 500000 |
|
time_horizon: 1000 |
|
summary_freq: 12000 |
|
checkpoint_interval: 1 |
|
threaded: true |
|
env_settings: |
|
env_path: test_env_path |
|
env_args: |
|
- test_env_args1 |
|
- test_env_args2 |
|
base_port: 12345 |
|
num_envs: 8 |
|
num_areas: 8 |
|
seed: 12345 |
|
engine_settings: |
|
width: 12345 |
|
height: 12345 |
|
quality_level: 12345 |
|
time_scale: 12345 |
|
target_frame_rate: 12345 |
|
capture_frame_rate: 12345 |
|
no_graphics: true |
|
checkpoint_settings: |
|
run_id: test_run_id |
|
initialize_from: test_directory |
|
load_model: false |
|
resume: true |
|
force: true |
|
train_model: false |
|
inference: false |
|
debug: true |
|
environment_parameters: |
|
big_wall_height: |
|
curriculum: |
|
- name: Lesson0 |
|
completion_criteria: |
|
measure: progress |
|
behavior: BigWallJump |
|
signal_smoothing: true |
|
min_lesson_length: 100 |
|
threshold: 0.1 |
|
value: |
|
sampler_type: uniform |
|
sampler_parameters: |
|
min_value: 0.0 |
|
max_value: 4.0 |
|
- name: Lesson1 |
|
completion_criteria: |
|
measure: reward |
|
behavior: BigWallJump |
|
signal_smoothing: true |
|
min_lesson_length: 100 |
|
threshold: 0.2 |
|
value: |
|
sampler_type: gaussian |
|
sampler_parameters: |
|
mean: 4.0 |
|
st_dev: 7.0 |
|
- name: Lesson2 |
|
completion_criteria: |
|
measure: progress |
|
behavior: BigWallJump |
|
signal_smoothing: true |
|
min_lesson_length: 20 |
|
threshold: 0.3 |
|
value: |
|
sampler_type: multirangeuniform |
|
sampler_parameters: |
|
intervals: [[1.0, 2.0],[4.0, 5.0]] |
|
- name: Lesson3 |
|
value: 8.0 |
|
small_wall_height: 42.0 |
|
other_wall_height: |
|
sampler_type: multirangeuniform |
|
sampler_parameters: |
|
intervals: [[1.0, 2.0],[4.0, 5.0]] |
|
""" |
|
if not use_defaults: |
|
loaded_yaml = yaml.safe_load(test_yaml) |
|
run_options = RunOptions.from_dict(yaml.safe_load(test_yaml)) |
|
else: |
|
run_options = RunOptions() |
|
dict_export = run_options.as_dict() |
|
|
|
if not use_defaults: |
|
check_dict_is_at_least( |
|
loaded_yaml, dict_export, exceptions=["environment_parameters"] |
|
) |
|
|
|
run_options2 = RunOptions.from_dict(dict_export) |
|
second_export = run_options2.as_dict() |
|
|
|
check_dict_is_at_least(dict_export, second_export) |
|
|
|
|
|
check_dict_is_at_least(second_export, dict_export) |
|
|
|
assert dict_export == second_export |
|
|
|
|
|
run_options2.checkpoint_settings.prioritize_resume_init() |
|
assert run_options2.checkpoint_settings.initialize_from is None |
|
|
|
|
|
def test_environment_settings(): |
|
|
|
EnvironmentSettings() |
|
|
|
|
|
EnvironmentSettings(num_envs=1) |
|
|
|
|
|
EnvironmentSettings(num_areas=2) |
|
|
|
|
|
EnvironmentSettings(num_envs=42, env_path="/foo/bar.exe") |
|
|
|
|
|
with pytest.raises(ValueError): |
|
EnvironmentSettings(num_envs=2) |
|
|
|
|
|
def test_default_settings(): |
|
|
|
default_settings = { |
|
"max_steps": 1, |
|
"network_settings": {"num_layers": 1000, "deterministic": True}, |
|
} |
|
behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}} |
|
run_options_dict = {"default_settings": default_settings, "behaviors": behaviors} |
|
run_options = RunOptions.from_dict(run_options_dict) |
|
|
|
|
|
default_settings_cls = cattr.structure(default_settings, TrainerSettings) |
|
check_if_different(default_settings_cls, run_options.behaviors["test2"]) |
|
|
|
|
|
test1_settings = run_options.behaviors["test1"] |
|
assert test1_settings.max_steps == 2 |
|
assert test1_settings.network_settings.hidden_units == 2000 |
|
assert test1_settings.network_settings.deterministic is True |
|
assert test1_settings.network_settings.num_layers == 1000 |
|
|
|
|
|
test1_settings.max_steps = 1 |
|
test1_settings.network_settings.hidden_units = ( |
|
default_settings_cls.network_settings.hidden_units |
|
) |
|
check_if_different(test1_settings, default_settings_cls) |
|
|
|
|
|
def test_config_specified(): |
|
|
|
|
|
TrainerSettings.default_override = None |
|
behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}} |
|
run_options_dict = {"behaviors": behaviors} |
|
ro = RunOptions.from_dict(run_options_dict) |
|
|
|
ro.behaviors.set_config_specified(False) |
|
|
|
assert isinstance(ro.behaviors["test2"], TrainerSettings) |
|
|
|
|
|
run_options_dict = {"behaviors": behaviors} |
|
ro = RunOptions.from_dict(run_options_dict) |
|
|
|
ro.behaviors.set_config_specified(True) |
|
with pytest.raises(TrainerConfigError): |
|
|
|
print(ro.behaviors["test2"]) |
|
|
|
|
|
default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}} |
|
run_options_dict = {"default_settings": default_settings, "behaviors": behaviors} |
|
ro = RunOptions.from_dict(run_options_dict) |
|
|
|
ro.behaviors.set_config_specified(True) |
|
|
|
assert isinstance(ro.behaviors["test2"], TrainerSettings) |
|
|
|
|
|
def test_pickle(): |
|
|
|
run_options = RunOptions() |
|
p = pickle.dumps(run_options) |
|
pickle.loads(p) |
|
|