gomoku / LightZero /zoo /board_games /test_speed_win-rate_between_bots.py
zjowowen's picture
init space
079c32c
raw
history blame
21.6 kB
"""
Overview:
Implement games between different bots to test the win rates and the speed.
Example:
test_tictactoe_mcts_bot_vs_alphabeta_bot means a game between mcts_bot and alphabeta_bot where
mcts_bot makes the first move (i.e. bots on the left make the first move).
"""
import time
import numpy as np
from easydict import EasyDict
from zoo.board_games.gomoku.envs.gomoku_env import GomokuEnv
from zoo.board_games.mcts_bot import MCTSBot
from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv
cfg_tictactoe = dict(
battle_mode='self_play_mode',
agent_vs_human=False,
bot_action_type='v0', # {'v0', 'alpha_beta_pruning'}
prob_random_agent=0,
prob_expert_agent=0,
channel_last=True,
scale=True,
prob_random_action_in_bot=0.,
)
def test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
"""
Overview:
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
cfg_tictactoe['bot_action_type'] = 'v0'
# List to record the time required for each decision round and the winner.
mcts_bot_time_list = []
bot_action_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = TicTacToeEnv(EasyDict(cfg_tictactoe))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset()
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 1 to move first.
player_index = 0
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set rule_bot to be player 1.
if player_index == 0:
t1 = time.time()
action = env.bot_action()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
mcts_bot_time_list.append(t2 - t1)
player_index = 1
# Set mcts_bot to be player 2.
else:
t1 = time.time()
# action = env.bot_action()
action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
bot_action_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
mcts_bot_mu = np.mean(mcts_bot_time_list)
mcts_bot_var = np.var(mcts_bot_time_list)
bot_action_mu = np.mean(bot_action_time_list)
bot_action_var = np.var(bot_action_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))
print('bot_action_time_list={}\n'.format(bot_action_time_list))
print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
def test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot(num_simulations=50):
"""
Overview:
A tictactoe game between alphabeta_bot and rule_bot, where alphabeta_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'
# List to record the time required for each decision round and the winner.
alphabeta_pruning_time_list = []
rule_bot_v0_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = TicTacToeEnv(EasyDict(cfg_tictactoe))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset(start_player_index=1)
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 2 to move first.
player_index = 1
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set rule_bot to be player 1.
if player_index == 0:
t1 = time.time()
action = env.rule_bot_v0()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
# mcts_bot_time_list.append(t2 - t1)
rule_bot_v0_time_list.append(t2 - t1)
player_index = 1
# Set alpha_beta_bot to be player 2.
else:
t1 = time.time()
action = env.bot_action_alpha_beta_pruning()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
alphabeta_pruning_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
if env.get_done_reward()[0]:
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)
rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
rule_bot_v0_var = np.var(rule_bot_v0_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))
print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
def test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=50):
"""
Overview:
A tictactoe game between alphabeta_bot and mcts_bot, where mcts_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'
# List to record the time required for each decision round and the winner.
alphabeta_pruning_time_list = []
mcts_bot_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = TicTacToeEnv(EasyDict(cfg_tictactoe))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset(start_player_index=1)
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 2 to move first.
player_index = 1
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set mcts_bot to be player 1.
if player_index == 0:
t1 = time.time()
# action = env.rule_bot_v0()
action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
mcts_bot_time_list.append(t2 - t1)
# rule_bot_v0_time_list.append(t2 - t1)
player_index = 1
# Set alpha_beta_bot to be player 2.
else:
t1 = time.time()
action = env.bot_action_alpha_beta_pruning()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
alphabeta_pruning_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
print(state)
print(action)
if env.get_done_reward()[0]:
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)
mcts_bot_mu = np.mean(mcts_bot_time_list)
mcts_bot_var = np.var(mcts_bot_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
def test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot(num_simulations=50):
"""
Overview:
A tictactoe game between rule_bot and alphabeta_bot, where rule_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'
# List to record the time required for each decision round and the winner.
alphabeta_pruning_time_list = []
rule_bot_v0_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = TicTacToeEnv(EasyDict(cfg_tictactoe))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset()
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 1 to move first.
player_index = 0
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set rule_bot to be player 1.
if player_index == 0:
t1 = time.time()
action = env.rule_bot_v0()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
# mcts_bot_time_list.append(t2 - t1)
rule_bot_v0_time_list.append(t2 - t1)
player_index = 1
# Set alpha_beta_bot to be player 2.
else:
t1 = time.time()
action = env.bot_action_alpha_beta_pruning()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
alphabeta_pruning_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
if env.get_done_reward()[0]:
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)
rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
rule_bot_v0_var = np.var(rule_bot_v0_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))
print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
def test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=50):
"""
Overview:
A tictactoe game between mcts_bot and alphabeta_bot, where mcts_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'
# List to record the time required for each decision round and the winner.
alphabeta_pruning_time_list = []
mcts_bot_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = TicTacToeEnv(EasyDict(cfg_tictactoe))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset()
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 1 to move first.
player_index = 0
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set mcts_bot to be player 1.
if player_index == 0:
t1 = time.time()
# action = env.mcts_bot()
action = player.get_actions(state, player_index=player_index, best_action_type = "most_visit")
t2 = time.time()
# print("The time difference is :", t2-t1)
# mcts_bot_time_list.append(t2 - t1)
mcts_bot_time_list.append(t2 - t1)
player_index = 1
# Set alpha_beta_bot to be player 2.
else:
t1 = time.time()
action = env.bot_action_alpha_beta_pruning()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
alphabeta_pruning_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
# Print the result of the game.
if env.get_done_reward()[0]:
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)
mcts_bot_mu = np.mean(mcts_bot_time_list)
mcts_bot_var = np.var(mcts_bot_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
print('mcts_bot_mu={}, bot_action_var={}\n'.format(mcts_bot_mu, mcts_bot_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
cfg_gomoku = dict(
board_size=5,
battle_mode='self_play_mode',
bot_action_type='v0', # {'v0', 'alpha_beta_pruning'}
agent_vs_human=False,
prob_random_agent=0,
channel_last=True,
scale=True,
prob_random_action_in_bot=0.,
check_action_to_connect4_in_bot_v0=False,
)
def test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
"""
Overview:
A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
Arguments:
- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
"""
# List to record the time required for each decision round and the winner.
mcts_bot_time_list = []
bot_action_time_list = []
winner = []
# Repeat the game for 10 rounds.
for i in range(10):
print('-' * 10 + str(i) + '-' * 10)
# Initialize the game, where there are two players: player 1 and player 2.
env = GomokuEnv(EasyDict(cfg_gomoku))
# Reset the environment, set the board to a clean board and the start player to be player 1.
env.reset()
state = env.board
player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
# Set player 1 to move first.
player_index = 0
while not env.get_done_reward()[0]:
"""
Overview:
The two players take turns to make moves, and the time required for each decision is recorded.
"""
# Set rule_bot to be player 1.
if player_index == 0:
t1 = time.time()
action = env.bot_action()
# action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
mcts_bot_time_list.append(t2 - t1)
player_index = 1
# Set mcts_bot to be player 2.
else:
t1 = time.time()
# action = env.bot_action()
action = player.get_actions(state, player_index=player_index)
t2 = time.time()
# print("The time difference is :", t2-t1)
bot_action_time_list.append(t2 - t1)
player_index = 0
env.step(action)
state = env.board
# Print the result of the game.
if env.get_done_reward()[0]:
print(state)
# Record the winner.
winner.append(env.get_done_winner()[1])
# Calculate the variance and mean of decision times.
mcts_bot_mu = np.mean(mcts_bot_time_list)
mcts_bot_var = np.var(mcts_bot_time_list)
bot_action_mu = np.mean(bot_action_time_list)
bot_action_var = np.var(bot_action_time_list)
# Print the information of the games.
print('num_simulations={}\n'.format(num_simulations))
print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))
print('bot_action_time_list={}\n'.format(bot_action_time_list))
print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))
print(
'winner={}, draw={}, player1={}, player2={}\n'.format(
winner, winner.count(-1), winner.count(1), winner.count(2)
)
)
if __name__ == '__main__':
# ==============================================================
# test win rate between alphabeta_bot and rule_bot_v0
# ==============================================================
# test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot()
# test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot()
# ==============================================================
# test win rate between alphabeta_bot and mcts_bot
# ==============================================================
# test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=2000)
# test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=2000)
# ==============================================================
# test win rate between mcts_bot and rule_bot_v0
# ==============================================================
test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50)
# test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=500)
# test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)
# test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)