""" Overview: Implement games between different bots to test the win rates and the speed. Example: test_tictactoe_mcts_bot_vs_alphabeta_bot means a game between mcts_bot and alphabeta_bot where mcts_bot makes the first move (i.e. bots on the left make the first move). """ import time import numpy as np from easydict import EasyDict from zoo.board_games.gomoku.envs.gomoku_env import GomokuEnv from zoo.board_games.mcts_bot import MCTSBot from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv cfg_tictactoe = dict( battle_mode='self_play_mode', agent_vs_human=False, bot_action_type='v0', # {'v0', 'alpha_beta_pruning'} prob_random_agent=0, prob_expert_agent=0, channel_last=True, scale=True, prob_random_action_in_bot=0., ) def test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50): """ Overview: A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ cfg_tictactoe['bot_action_type'] = 'v0' # List to record the time required for each decision round and the winner. mcts_bot_time_list = [] bot_action_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = TicTacToeEnv(EasyDict(cfg_tictactoe)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset() state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 1 to move first. player_index = 0 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set rule_bot to be player 1. if player_index == 0: t1 = time.time() action = env.bot_action() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) mcts_bot_time_list.append(t2 - t1) player_index = 1 # Set mcts_bot to be player 2. else: t1 = time.time() # action = env.bot_action() action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) bot_action_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. mcts_bot_mu = np.mean(mcts_bot_time_list) mcts_bot_var = np.var(mcts_bot_time_list) bot_action_mu = np.mean(bot_action_time_list) bot_action_var = np.var(bot_action_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) print('bot_action_time_list={}\n'.format(bot_action_time_list)) print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) def test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot(num_simulations=50): """ Overview: A tictactoe game between alphabeta_bot and rule_bot, where alphabeta_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' # List to record the time required for each decision round and the winner. alphabeta_pruning_time_list = [] rule_bot_v0_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = TicTacToeEnv(EasyDict(cfg_tictactoe)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset(start_player_index=1) state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 2 to move first. player_index = 1 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set rule_bot to be player 1. if player_index == 0: t1 = time.time() action = env.rule_bot_v0() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) # mcts_bot_time_list.append(t2 - t1) rule_bot_v0_time_list.append(t2 - t1) player_index = 1 # Set alpha_beta_bot to be player 2. else: t1 = time.time() action = env.bot_action_alpha_beta_pruning() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) alphabeta_pruning_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board if env.get_done_reward()[0]: print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) rule_bot_v0_mu = np.mean(rule_bot_v0_time_list) rule_bot_v0_var = np.var(rule_bot_v0_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list)) print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) def test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=50): """ Overview: A tictactoe game between alphabeta_bot and mcts_bot, where mcts_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' # List to record the time required for each decision round and the winner. alphabeta_pruning_time_list = [] mcts_bot_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = TicTacToeEnv(EasyDict(cfg_tictactoe)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset(start_player_index=1) state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 2 to move first. player_index = 1 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set mcts_bot to be player 1. if player_index == 0: t1 = time.time() # action = env.rule_bot_v0() action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) mcts_bot_time_list.append(t2 - t1) # rule_bot_v0_time_list.append(t2 - t1) player_index = 1 # Set alpha_beta_bot to be player 2. else: t1 = time.time() action = env.bot_action_alpha_beta_pruning() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) alphabeta_pruning_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board print(state) print(action) if env.get_done_reward()[0]: print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) mcts_bot_mu = np.mean(mcts_bot_time_list) mcts_bot_var = np.var(mcts_bot_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) def test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot(num_simulations=50): """ Overview: A tictactoe game between rule_bot and alphabeta_bot, where rule_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' # List to record the time required for each decision round and the winner. alphabeta_pruning_time_list = [] rule_bot_v0_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = TicTacToeEnv(EasyDict(cfg_tictactoe)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset() state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 1 to move first. player_index = 0 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set rule_bot to be player 1. if player_index == 0: t1 = time.time() action = env.rule_bot_v0() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) # mcts_bot_time_list.append(t2 - t1) rule_bot_v0_time_list.append(t2 - t1) player_index = 1 # Set alpha_beta_bot to be player 2. else: t1 = time.time() action = env.bot_action_alpha_beta_pruning() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) alphabeta_pruning_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board if env.get_done_reward()[0]: print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) rule_bot_v0_mu = np.mean(rule_bot_v0_time_list) rule_bot_v0_var = np.var(rule_bot_v0_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list)) print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) def test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=50): """ Overview: A tictactoe game between mcts_bot and alphabeta_bot, where mcts_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning' # List to record the time required for each decision round and the winner. alphabeta_pruning_time_list = [] mcts_bot_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = TicTacToeEnv(EasyDict(cfg_tictactoe)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset() state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 1 to move first. player_index = 0 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set mcts_bot to be player 1. if player_index == 0: t1 = time.time() # action = env.mcts_bot() action = player.get_actions(state, player_index=player_index, best_action_type = "most_visit") t2 = time.time() # print("The time difference is :", t2-t1) # mcts_bot_time_list.append(t2 - t1) mcts_bot_time_list.append(t2 - t1) player_index = 1 # Set alpha_beta_bot to be player 2. else: t1 = time.time() action = env.bot_action_alpha_beta_pruning() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) alphabeta_pruning_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board # Print the result of the game. if env.get_done_reward()[0]: print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list) alphabeta_pruning_var = np.var(alphabeta_pruning_time_list) mcts_bot_mu = np.mean(mcts_bot_time_list) mcts_bot_var = np.var(mcts_bot_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list)) print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var)) print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) print('mcts_bot_mu={}, bot_action_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) cfg_gomoku = dict( board_size=5, battle_mode='self_play_mode', bot_action_type='v0', # {'v0', 'alpha_beta_pruning'} agent_vs_human=False, prob_random_agent=0, channel_last=True, scale=True, prob_random_action_in_bot=0., check_action_to_connect4_in_bot_v0=False, ) def test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50): """ Overview: A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move. Arguments: - num_simulations (:obj:`int`): The number of the simulations required to find the best move. """ # List to record the time required for each decision round and the winner. mcts_bot_time_list = [] bot_action_time_list = [] winner = [] # Repeat the game for 10 rounds. for i in range(10): print('-' * 10 + str(i) + '-' * 10) # Initialize the game, where there are two players: player 1 and player 2. env = GomokuEnv(EasyDict(cfg_gomoku)) # Reset the environment, set the board to a clean board and the start player to be player 1. env.reset() state = env.board player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1 # Set player 1 to move first. player_index = 0 while not env.get_done_reward()[0]: """ Overview: The two players take turns to make moves, and the time required for each decision is recorded. """ # Set rule_bot to be player 1. if player_index == 0: t1 = time.time() action = env.bot_action() # action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) mcts_bot_time_list.append(t2 - t1) player_index = 1 # Set mcts_bot to be player 2. else: t1 = time.time() # action = env.bot_action() action = player.get_actions(state, player_index=player_index) t2 = time.time() # print("The time difference is :", t2-t1) bot_action_time_list.append(t2 - t1) player_index = 0 env.step(action) state = env.board # Print the result of the game. if env.get_done_reward()[0]: print(state) # Record the winner. winner.append(env.get_done_winner()[1]) # Calculate the variance and mean of decision times. mcts_bot_mu = np.mean(mcts_bot_time_list) mcts_bot_var = np.var(mcts_bot_time_list) bot_action_mu = np.mean(bot_action_time_list) bot_action_var = np.var(bot_action_time_list) # Print the information of the games. print('num_simulations={}\n'.format(num_simulations)) print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list)) print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var)) print('bot_action_time_list={}\n'.format(bot_action_time_list)) print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var)) print( 'winner={}, draw={}, player1={}, player2={}\n'.format( winner, winner.count(-1), winner.count(1), winner.count(2) ) ) if __name__ == '__main__': # ============================================================== # test win rate between alphabeta_bot and rule_bot_v0 # ============================================================== # test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot() # test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot() # ============================================================== # test win rate between alphabeta_bot and mcts_bot # ============================================================== # test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=2000) # test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=2000) # ============================================================== # test win rate between mcts_bot and rule_bot_v0 # ============================================================== test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50) # test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=500) # test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000) # test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)