|
""" |
|
Overview: |
|
Implement games between different bots to test the win rates and the speed. |
|
Example: |
|
test_tictactoe_mcts_bot_vs_alphabeta_bot means a game between mcts_bot and alphabeta_bot where |
|
mcts_bot makes the first move (i.e. bots on the left make the first move). |
|
""" |
|
import time |
|
|
|
import numpy as np |
|
from easydict import EasyDict |
|
|
|
from zoo.board_games.gomoku.envs.gomoku_env import GomokuEnv |
|
from zoo.board_games.mcts_bot import MCTSBot |
|
from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv |
|
|
|
cfg_tictactoe = dict( |
|
battle_mode='self_play_mode', |
|
agent_vs_human=False, |
|
bot_action_type='v0', |
|
prob_random_agent=0, |
|
prob_expert_agent=0, |
|
channel_last=True, |
|
scale=True, |
|
prob_random_action_in_bot=0., |
|
) |
|
|
|
|
|
def test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
cfg_tictactoe['bot_action_type'] = 'v0' |
|
|
|
mcts_bot_time_list = [] |
|
bot_action_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = TicTacToeEnv(EasyDict(cfg_tictactoe)) |
|
|
|
env.reset() |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
action = env.bot_action() |
|
|
|
t2 = time.time() |
|
|
|
mcts_bot_time_list.append(t2 - t1) |
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
|
|
action = player.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
bot_action_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
print(state) |
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
mcts_bot_mu = np.mean(mcts_bot_time_list) |
|
mcts_bot_var = np.var(mcts_bot_time_list) |
|
|
|
bot_action_mu = np.mean(bot_action_time_list) |
|
bot_action_var = np.var(bot_action_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) |
|
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) |
|
|
|
print('bot_action_time_list={}\n'.format(bot_action_time_list)) |
|
print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
def test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between alphabeta_bot and rule_bot, where alphabeta_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' |
|
|
|
|
|
alphabeta_pruning_time_list = [] |
|
rule_bot_v0_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = TicTacToeEnv(EasyDict(cfg_tictactoe)) |
|
|
|
env.reset(start_player_index=1) |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 1 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
action = env.rule_bot_v0() |
|
|
|
t2 = time.time() |
|
|
|
|
|
rule_bot_v0_time_list.append(t2 - t1) |
|
|
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action_alpha_beta_pruning() |
|
|
|
t2 = time.time() |
|
|
|
alphabeta_pruning_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
if env.get_done_reward()[0]: |
|
print(state) |
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) |
|
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) |
|
|
|
rule_bot_v0_mu = np.mean(rule_bot_v0_time_list) |
|
rule_bot_v0_var = np.var(rule_bot_v0_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) |
|
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) |
|
|
|
print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list)) |
|
print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
def test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between alphabeta_bot and mcts_bot, where mcts_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' |
|
|
|
|
|
alphabeta_pruning_time_list = [] |
|
mcts_bot_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = TicTacToeEnv(EasyDict(cfg_tictactoe)) |
|
|
|
env.reset(start_player_index=1) |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 1 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
|
|
action = player.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
mcts_bot_time_list.append(t2 - t1) |
|
|
|
|
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action_alpha_beta_pruning() |
|
|
|
t2 = time.time() |
|
|
|
alphabeta_pruning_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
print(state) |
|
print(action) |
|
if env.get_done_reward()[0]: |
|
print(state) |
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) |
|
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) |
|
|
|
mcts_bot_mu = np.mean(mcts_bot_time_list) |
|
mcts_bot_var = np.var(mcts_bot_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) |
|
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) |
|
|
|
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) |
|
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
def test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between rule_bot and alphabeta_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' |
|
|
|
|
|
alphabeta_pruning_time_list = [] |
|
rule_bot_v0_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = TicTacToeEnv(EasyDict(cfg_tictactoe)) |
|
|
|
env.reset() |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
action = env.rule_bot_v0() |
|
|
|
t2 = time.time() |
|
|
|
|
|
rule_bot_v0_time_list.append(t2 - t1) |
|
|
|
player_index = 1 |
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action_alpha_beta_pruning() |
|
|
|
t2 = time.time() |
|
|
|
alphabeta_pruning_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
if env.get_done_reward()[0]: |
|
print(state) |
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) |
|
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) |
|
|
|
rule_bot_v0_mu = np.mean(rule_bot_v0_time_list) |
|
rule_bot_v0_var = np.var(rule_bot_v0_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) |
|
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) |
|
|
|
print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list)) |
|
print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
def test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and alphabeta_bot, where mcts_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' |
|
|
|
|
|
alphabeta_pruning_time_list = [] |
|
mcts_bot_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = TicTacToeEnv(EasyDict(cfg_tictactoe)) |
|
|
|
env.reset() |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
|
|
action = player.get_actions(state, player_index=player_index, best_action_type = "most_visit") |
|
t2 = time.time() |
|
|
|
|
|
mcts_bot_time_list.append(t2 - t1) |
|
|
|
player_index = 1 |
|
|
|
|
|
else: |
|
t1 = time.time() |
|
action = env.bot_action_alpha_beta_pruning() |
|
|
|
t2 = time.time() |
|
|
|
alphabeta_pruning_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
|
|
if env.get_done_reward()[0]: |
|
print(state) |
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) |
|
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) |
|
|
|
mcts_bot_mu = np.mean(mcts_bot_time_list) |
|
mcts_bot_var = np.var(mcts_bot_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) |
|
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) |
|
|
|
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) |
|
print('mcts_bot_mu={}, bot_action_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
cfg_gomoku = dict( |
|
board_size=5, |
|
battle_mode='self_play_mode', |
|
bot_action_type='v0', |
|
agent_vs_human=False, |
|
prob_random_agent=0, |
|
channel_last=True, |
|
scale=True, |
|
prob_random_action_in_bot=0., |
|
check_action_to_connect4_in_bot_v0=False, |
|
) |
|
|
|
|
|
def test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50): |
|
""" |
|
Overview: |
|
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. |
|
Arguments: |
|
- num_simulations (:obj:`int`): The number of the simulations required to find the best move. |
|
""" |
|
|
|
mcts_bot_time_list = [] |
|
bot_action_time_list = [] |
|
winner = [] |
|
|
|
|
|
for i in range(10): |
|
print('-' * 10 + str(i) + '-' * 10) |
|
|
|
env = GomokuEnv(EasyDict(cfg_gomoku)) |
|
|
|
env.reset() |
|
state = env.board |
|
player = MCTSBot(env, 'a', num_simulations) |
|
|
|
player_index = 0 |
|
while not env.get_done_reward()[0]: |
|
""" |
|
Overview: |
|
The two players take turns to make moves, and the time required for each decision is recorded. |
|
""" |
|
|
|
if player_index == 0: |
|
t1 = time.time() |
|
action = env.bot_action() |
|
|
|
t2 = time.time() |
|
|
|
mcts_bot_time_list.append(t2 - t1) |
|
player_index = 1 |
|
|
|
|
|
else: |
|
t1 = time.time() |
|
|
|
action = player.get_actions(state, player_index=player_index) |
|
t2 = time.time() |
|
|
|
bot_action_time_list.append(t2 - t1) |
|
player_index = 0 |
|
env.step(action) |
|
state = env.board |
|
|
|
if env.get_done_reward()[0]: |
|
print(state) |
|
|
|
|
|
winner.append(env.get_done_winner()[1]) |
|
|
|
|
|
mcts_bot_mu = np.mean(mcts_bot_time_list) |
|
mcts_bot_var = np.var(mcts_bot_time_list) |
|
|
|
bot_action_mu = np.mean(bot_action_time_list) |
|
bot_action_var = np.var(bot_action_time_list) |
|
|
|
|
|
print('num_simulations={}\n'.format(num_simulations)) |
|
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) |
|
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) |
|
|
|
print('bot_action_time_list={}\n'.format(bot_action_time_list)) |
|
print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var)) |
|
|
|
print( |
|
'winner={}, draw={}, player1={}, player2={}\n'.format( |
|
winner, winner.count(-1), winner.count(1), winner.count(2) |
|
) |
|
) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50) |
|
|
|
|
|
|
|
|
|
|