|
from unittest.mock import patch |
|
|
|
import pytest |
|
|
|
from mlagents.trainers.agent_processor import AgentManagerQueue |
|
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers |
|
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|
from mlagents.trainers.settings import RunOptions |
|
from mlagents.trainers.tests import mock_brain as mb |
|
from mlagents.trainers.tests.dummy_config import ( |
|
create_observation_specs_with_shapes, |
|
ppo_dummy_config, |
|
poca_dummy_config, |
|
sac_dummy_config, |
|
) |
|
from mlagents.trainers.tests.mock_brain import make_fake_trajectory |
|
from mlagents.trainers.trainer import TrainerFactory |
|
|
|
|
|
@pytest.fixture |
|
def ppo_config(): |
|
return RunOptions(behaviors={"test_brain": ppo_dummy_config()}) |
|
|
|
|
|
@pytest.fixture |
|
def sac_config(): |
|
return RunOptions(behaviors={"test_brain": sac_dummy_config()}) |
|
|
|
|
|
@pytest.fixture |
|
def poca_config(): |
|
return RunOptions(behaviors={"test_brain": poca_dummy_config()}) |
|
|
|
|
|
def test_ppo_trainer_update_normalization(ppo_config): |
|
behavior_id_team0 = "test_brain?team=0" |
|
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name |
|
mock_specs = mb.setup_test_behavior_specs( |
|
True, False, vector_action_space=[2], vector_obs_space=1 |
|
) |
|
base_config = ppo_config.behaviors |
|
output_path = "results_dir" |
|
train_model = True |
|
load_model = False |
|
seed = 42 |
|
trainer_factory = TrainerFactory( |
|
trainer_config=base_config, |
|
output_path=output_path, |
|
train_model=train_model, |
|
load_model=load_model, |
|
seed=seed, |
|
param_manager=EnvironmentParameterManager(), |
|
) |
|
ppo_trainer = trainer_factory.generate(brain_name) |
|
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0) |
|
policy = ppo_trainer.create_policy(parsed_behavior_id0, mock_specs) |
|
ppo_trainer.add_policy(parsed_behavior_id0, policy) |
|
trajectory_queue0 = AgentManagerQueue(behavior_id_team0) |
|
ppo_trainer.subscribe_trajectory_queue(trajectory_queue0) |
|
time_horizon = 15 |
|
trajectory = make_fake_trajectory( |
|
length=time_horizon, |
|
max_step_complete=True, |
|
observation_specs=create_observation_specs_with_shapes([(1,)]), |
|
action_spec=mock_specs.action_spec, |
|
) |
|
trajectory_queue0.put(trajectory) |
|
|
|
with patch( |
|
"mlagents.trainers.torch_entities.networks.ValueNetwork.update_normalization" |
|
) as optimizer_update_normalization_mock, patch( |
|
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization" |
|
) as policy_update_normalization_mock: |
|
ppo_trainer.advance() |
|
optimizer_update_normalization_mock.assert_called_once() |
|
policy_update_normalization_mock.assert_called_once() |
|
|
|
|
|
def test_sac_trainer_update_normalization(sac_config): |
|
behavior_id_team0 = "test_brain?team=0" |
|
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name |
|
mock_specs = mb.setup_test_behavior_specs( |
|
True, False, vector_action_space=[2], vector_obs_space=1 |
|
) |
|
base_config = sac_config.behaviors |
|
output_path = "results_dir" |
|
train_model = True |
|
load_model = False |
|
seed = 42 |
|
trainer_factory = TrainerFactory( |
|
trainer_config=base_config, |
|
output_path=output_path, |
|
train_model=train_model, |
|
load_model=load_model, |
|
seed=seed, |
|
param_manager=EnvironmentParameterManager(), |
|
) |
|
sac_trainer = trainer_factory.generate(brain_name) |
|
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0) |
|
policy = sac_trainer.create_policy(parsed_behavior_id0, mock_specs) |
|
sac_trainer.add_policy(parsed_behavior_id0, policy) |
|
trajectory_queue0 = AgentManagerQueue(behavior_id_team0) |
|
sac_trainer.subscribe_trajectory_queue(trajectory_queue0) |
|
time_horizon = 15 |
|
trajectory = make_fake_trajectory( |
|
length=time_horizon, |
|
max_step_complete=True, |
|
observation_specs=create_observation_specs_with_shapes([(1,)]), |
|
action_spec=mock_specs.action_spec, |
|
) |
|
trajectory_queue0.put(trajectory) |
|
|
|
with patch( |
|
"mlagents.trainers.torch_entities.networks.ValueNetwork.update_normalization" |
|
) as optimizer_update_normalization_mock, patch( |
|
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization" |
|
) as policy_update_normalization_mock: |
|
sac_trainer.advance() |
|
optimizer_update_normalization_mock.assert_called_once() |
|
policy_update_normalization_mock.assert_called_once() |
|
|
|
|
|
def test_poca_trainer_update_normalization(poca_config): |
|
behavior_id_team0 = "test_brain?team=0" |
|
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name |
|
mock_specs = mb.setup_test_behavior_specs( |
|
True, False, vector_action_space=[2], vector_obs_space=1 |
|
) |
|
base_config = poca_config.behaviors |
|
output_path = "results_dir" |
|
train_model = True |
|
load_model = False |
|
seed = 42 |
|
trainer_factory = TrainerFactory( |
|
trainer_config=base_config, |
|
output_path=output_path, |
|
train_model=train_model, |
|
load_model=load_model, |
|
seed=seed, |
|
param_manager=EnvironmentParameterManager(), |
|
) |
|
poca_trainer = trainer_factory.generate(brain_name) |
|
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0) |
|
policy = poca_trainer.create_policy(parsed_behavior_id0, mock_specs) |
|
poca_trainer.add_policy(parsed_behavior_id0, policy) |
|
trajectory_queue0 = AgentManagerQueue(behavior_id_team0) |
|
poca_trainer.subscribe_trajectory_queue(trajectory_queue0) |
|
time_horizon = 15 |
|
trajectory = make_fake_trajectory( |
|
length=time_horizon, |
|
max_step_complete=True, |
|
observation_specs=create_observation_specs_with_shapes([(1,)]), |
|
action_spec=mock_specs.action_spec, |
|
) |
|
trajectory_queue0.put(trajectory) |
|
|
|
with patch( |
|
"mlagents.trainers.poca.optimizer_torch.TorchPOCAOptimizer.POCAValueNetwork.update_normalization" |
|
) as optimizer_update_normalization_mock, patch( |
|
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization" |
|
) as policy_update_normalization_mock: |
|
poca_trainer.advance() |
|
optimizer_update_normalization_mock.assert_called_once() |
|
policy_update_normalization_mock.assert_called_once() |
|
|