import pytest from easydict import EasyDict from ding.config import compile_config_parallel from ding.worker.coordinator.one_vs_one_parallel_commander import OneVsOneCommander @pytest.fixture(scope='function') def setup_1v1commander(): nstep = 1 eval_interval = 5 main_config = dict( exp_name='one_vs_one_test', env=dict( collector_env_num=8, collector_episode_num=2, evaluator_env_num=5, evaluator_episode_num=1, stop_value=20, ), policy=dict( cuda=False, model=dict( obs_shape=[4, 84, 84], action_shape=3, encoder_kwargs=dict(encoder_type='conv2d'), ), nstep=nstep, learn=dict( batch_size=32, learning_rate=0.0001, weight_decay=0., algo=dict( target_update_freq=500, discount_factor=0.99, nstep=nstep, ), learner=dict( learner_num=1, send_policy_freq=1, ), ), collect=dict( traj_len=15, algo=dict(nstep=nstep), collector=dict( collector_num=2, update_policy_second=3, ), ), other=dict( eps=dict( type='linear', start=1., end=0.005, decay=1000000, ), commander=dict( collector_task_space=2, learner_task_space=1, eval_interval=eval_interval, league=dict(naive_sp_player=dict(one_phase_step=1000, ), ), ), replay_buffer=dict(), ), ), ) main_config = EasyDict(main_config) create_config = dict( env=dict( # 1v1 commander should use “competitive_rl”. # However, because this env is hard to install, we use "cartpole" instead. # But commander does not need a real env, it is just preserved to use `compile_config_parallel`. type='cartpole', import_names=['dizoo.classic_control.cartpole.envs.cartpole_env'], ), env_manager=dict(type='base'), policy=dict(type='dqn_command'), learner=dict(type='base', import_names=['ding.worker.learner.base_learner']), collector=dict( type='zergling', import_names=['ding.worker.collector.zergling_parallel_collector'], ), commander=dict( type='one_vs_one', import_names=['ding.worker.coordinator.one_vs_one_parallel_commander'], ), comm_learner=dict( type='flask_fs', import_names=['ding.worker.learner.comm.flask_fs_learner'], ), comm_collector=dict( type='flask_fs', import_names=['ding.worker.collector.comm.flask_fs_collector'], ), league=dict(type='one_vs_one'), ) system_config = dict( coordinator=dict(), path_data='./data', path_policy='./policy', communication_mode='auto', learner_gpu_num=1, ) system_config = EasyDict(system_config) create_config = EasyDict(create_config) config = compile_config_parallel(main_config, create_cfg=create_config, system_cfg=system_config) return OneVsOneCommander(config['main'])