|
import time |
|
|
|
import numpy as np |
|
import pytest |
|
from easydict import EasyDict |
|
|
|
from connect4_env import Connect4Env |
|
from zoo.board_games.mcts_bot import MCTSBot |
|
|
|
|
|
@pytest.mark.unittest |
|
class TestConnect4Bot(): |
|
""" |
|
Overview: |
|
This class is used to test the Connect4 Bots. |
|
""" |
|
|
|
def setup(self) -> None: |
|
""" |
|
Overview: |
|
This method is responsible for setting up the initial configurations required for the game environment. |
|
It creates an instance of the Connect4Env class and Connect4RuleBot class. |
|
""" |
|
self.cfg = EasyDict( |
|
battle_mode='self_play_mode', |
|
battle_mode_in_simulation_env='self_play_mode', |
|
channel_last=True, |
|
scale=True, |
|
agent_vs_human=False, |
|
prob_random_agent=0, |
|
prob_expert_agent=0, |
|
bot_action_type='rule', |
|
screen_scaling=9, |
|
render_mode='image_savefile_mode', |
|
prob_random_action_in_bot=0, |
|
) |
|
|
|
def test_mcts_bot_vs_rule_bot(self, num_simulations: int = 200) -> None: |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
|
|
mcts_bot_time_list = [] |
|
bot_action_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = Connect4Env(EasyDict(self.cfg)) |
|
|
|
env.reset(replay_name_suffix=f'test{i}') |
|
state = env.board |
|
self.cfg_temp = EasyDict(self.cfg.copy()) |
|
self.cfg_temp.save_replay = False |
|
env_mcts = Connect4Env(EasyDict(self.cfg_temp)) |
|
player = MCTSBot(env_mcts, 'a', num_simulations) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
|
|
action = player.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
mcts_bot_time_list.append(t2 - t1) |
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action() |
|
|
|
t2 = time.time() |
|
|
|
bot_action_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
|
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
mcts_bot_mu = np.mean(mcts_bot_time_list) |
|
mcts_bot_var = np.var(mcts_bot_time_list) |
|
|
|
bot_action_mu = np.mean(bot_action_time_list) |
|
bot_action_var = np.var(bot_action_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
|
|
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) |
|
|
|
|
|
print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
def test_mcts_bot_vs_mcts_bot(self, num_simulations_1: int = 50, num_simulations_2: int = 50) -> None: |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations_1 (:obj:`int`): The number of the simulations of player 1 required to find the best move. |
|
- num_simulations_2 (:obj:`int`): The number of the simulations of player 2 required to find the best move. |
|
""" |
|
|
|
mcts_bot1_time_list = [] |
|
mcts_bot2_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = Connect4Env(EasyDict(self.cfg)) |
|
|
|
env.reset() |
|
state = env.board |
|
player1 = MCTSBot(env, 'a', num_simulations_1) |
|
player2 = MCTSBot(env, 'a', num_simulations_2) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
|
|
action = player1.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
mcts_bot1_time_list.append(t2 - t1) |
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
|
|
action = player2.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
mcts_bot2_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
|
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
mcts_bot1_mu = np.mean(mcts_bot1_time_list) |
|
mcts_bot1_var = np.var(mcts_bot1_time_list) |
|
|
|
mcts_bot2_mu = np.mean(mcts_bot2_time_list) |
|
mcts_bot2_var = np.var(mcts_bot2_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(200)) |
|
print('mcts_bot1_time_list={}\n'.format(mcts_bot1_time_list)) |
|
print('mcts_bot1_mu={}, mcts_bot1_var={}\n'.format(mcts_bot1_mu, mcts_bot1_var)) |
|
|
|
print('num_simulations={}\n'.format(1000)) |
|
print('mcts_bot2_time_list={}\n'.format(mcts_bot2_time_list)) |
|
print('mcts_bot2_mu={}, mcts_bot2_var={}\n'.format(mcts_bot2_mu, mcts_bot2_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
def test_rule_bot_vs_rule_bot(self) -> None: |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
|
|
bot_action_time_list2 = [] |
|
bot_action_time_list1 = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = Connect4Env(EasyDict(self.cfg)) |
|
|
|
env.reset(replay_name_suffix=f'test{i}') |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
action = env.bot_action() |
|
t2 = time.time() |
|
|
|
bot_action_time_list1.append(t2 - t1) |
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action() |
|
|
|
t2 = time.time() |
|
|
|
bot_action_time_list2.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
|
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
bot_action_mu1 = np.mean(bot_action_time_list1) |
|
bot_action_var1 = np.var(bot_action_time_list1) |
|
|
|
bot_action_mu2 = np.mean(bot_action_time_list2) |
|
bot_action_var2 = np.var(bot_action_time_list2) |
|
|
|
|
|
|
|
print('bot_action_mu1={}, bot_action_var1={}\n'.format(bot_action_mu1, bot_action_var1)) |
|
|
|
|
|
print('bbot_action_mu2={}, bot_action_var2={}\n'.format(bot_action_mu2, bot_action_var2)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|