AnnaMats's picture
Second Push
05c9ac2
from unittest.mock import patch
import pytest
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.dummy_config import (
create_observation_specs_with_shapes,
ppo_dummy_config,
poca_dummy_config,
sac_dummy_config,
)
from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.trainer import TrainerFactory
@pytest.fixture
def ppo_config():
return RunOptions(behaviors={"test_brain": ppo_dummy_config()})
@pytest.fixture
def sac_config():
return RunOptions(behaviors={"test_brain": sac_dummy_config()})
@pytest.fixture
def poca_config():
return RunOptions(behaviors={"test_brain": poca_dummy_config()})
def test_ppo_trainer_update_normalization(ppo_config):
behavior_id_team0 = "test_brain?team=0"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
base_config = ppo_config.behaviors
output_path = "results_dir"
train_model = True
load_model = False
seed = 42
trainer_factory = TrainerFactory(
trainer_config=base_config,
output_path=output_path,
train_model=train_model,
load_model=load_model,
seed=seed,
param_manager=EnvironmentParameterManager(),
)
ppo_trainer = trainer_factory.generate(brain_name)
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = ppo_trainer.create_policy(parsed_behavior_id0, mock_specs)
ppo_trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
ppo_trainer.subscribe_trajectory_queue(trajectory_queue0)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_specs=create_observation_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)
# mocking out update_normalization in both the policy and critic
with patch(
"mlagents.trainers.torch_entities.networks.ValueNetwork.update_normalization"
) as optimizer_update_normalization_mock, patch(
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization"
) as policy_update_normalization_mock:
ppo_trainer.advance()
optimizer_update_normalization_mock.assert_called_once()
policy_update_normalization_mock.assert_called_once()
def test_sac_trainer_update_normalization(sac_config):
behavior_id_team0 = "test_brain?team=0"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
base_config = sac_config.behaviors
output_path = "results_dir"
train_model = True
load_model = False
seed = 42
trainer_factory = TrainerFactory(
trainer_config=base_config,
output_path=output_path,
train_model=train_model,
load_model=load_model,
seed=seed,
param_manager=EnvironmentParameterManager(),
)
sac_trainer = trainer_factory.generate(brain_name)
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = sac_trainer.create_policy(parsed_behavior_id0, mock_specs)
sac_trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
sac_trainer.subscribe_trajectory_queue(trajectory_queue0)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_specs=create_observation_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)
# mocking out update_normalization in both the policy and critic
with patch(
"mlagents.trainers.torch_entities.networks.ValueNetwork.update_normalization"
) as optimizer_update_normalization_mock, patch(
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization"
) as policy_update_normalization_mock:
sac_trainer.advance()
optimizer_update_normalization_mock.assert_called_once()
policy_update_normalization_mock.assert_called_once()
def test_poca_trainer_update_normalization(poca_config):
behavior_id_team0 = "test_brain?team=0"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
base_config = poca_config.behaviors
output_path = "results_dir"
train_model = True
load_model = False
seed = 42
trainer_factory = TrainerFactory(
trainer_config=base_config,
output_path=output_path,
train_model=train_model,
load_model=load_model,
seed=seed,
param_manager=EnvironmentParameterManager(),
)
poca_trainer = trainer_factory.generate(brain_name)
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = poca_trainer.create_policy(parsed_behavior_id0, mock_specs)
poca_trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
poca_trainer.subscribe_trajectory_queue(trajectory_queue0)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_specs=create_observation_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)
# mocking out update_normalization in both the policy and critic
with patch(
"mlagents.trainers.poca.optimizer_torch.TorchPOCAOptimizer.POCAValueNetwork.update_normalization"
) as optimizer_update_normalization_mock, patch(
"mlagents.trainers.torch_entities.networks.SimpleActor.update_normalization"
) as policy_update_normalization_mock:
poca_trainer.advance()
optimizer_update_normalization_mock.assert_called_once()
policy_update_normalization_mock.assert_called_once()