"""
Overview:
    Implement games between different bots to test the win rates and the speed.
Example:
    test_tictactoe_mcts_bot_vs_alphabeta_bot means a game between mcts_bot and alphabeta_bot where 
    mcts_bot makes the first move (i.e. bots on the left make the first move).
"""
import time

import numpy as np
from easydict import EasyDict

from zoo.board_games.gomoku.envs.gomoku_env import GomokuEnv
from zoo.board_games.mcts_bot import MCTSBot
from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv

cfg_tictactoe = dict(
    battle_mode='self_play_mode',
    agent_vs_human=False,
    bot_action_type='v0',  # {'v0', 'alpha_beta_pruning'}
    prob_random_agent=0,
    prob_expert_agent=0,
    channel_last=True,
    scale=True,
    prob_random_action_in_bot=0.,
)


def test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    cfg_tictactoe['bot_action_type'] = 'v0'
    # List to record the time required for each decision round and the winner.
    mcts_bot_time_list = []
    bot_action_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = TicTacToeEnv(EasyDict(cfg_tictactoe))
        # Reset the environment, set the board to a clean board and the  start player to be player 1.
        env.reset()
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 1 to move first.
        player_index = 0
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set rule_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                action = env.bot_action()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                mcts_bot_time_list.append(t2 - t1)
                player_index = 1
            # Set mcts_bot to be player 2.
            else:
                t1 = time.time()
                # action = env.bot_action()
                action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                bot_action_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            print(state)

        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    mcts_bot_mu = np.mean(mcts_bot_time_list)
    mcts_bot_var = np.var(mcts_bot_time_list)

    bot_action_mu = np.mean(bot_action_time_list)
    bot_action_var = np.var(bot_action_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
    print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

    print('bot_action_time_list={}\n'.format(bot_action_time_list))
    print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


def test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between alphabeta_bot and rule_bot, where alphabeta_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

    # List to record the time required for each decision round and the winner.
    alphabeta_pruning_time_list = []
    rule_bot_v0_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = TicTacToeEnv(EasyDict(cfg_tictactoe))
        # Reset the environment, set the board to a clean board and the  start player to be player 1.
        env.reset(start_player_index=1)
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 2 to move first.
        player_index = 1
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set rule_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                action = env.rule_bot_v0()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                # mcts_bot_time_list.append(t2 - t1)
                rule_bot_v0_time_list.append(t2 - t1)

                player_index = 1
            # Set alpha_beta_bot to be player 2.
            else:
                t1 = time.time()
                action = env.bot_action_alpha_beta_pruning()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                alphabeta_pruning_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            if env.get_done_reward()[0]:
                print(state)

        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
    alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

    rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
    rule_bot_v0_var = np.var(rule_bot_v0_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
    print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

    print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
    print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


def test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between alphabeta_bot and mcts_bot, where mcts_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

    # List to record the time required for each decision round and the winner.
    alphabeta_pruning_time_list = []
    mcts_bot_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = TicTacToeEnv(EasyDict(cfg_tictactoe))
        # Reset the environment, set the board to a clean board and the  start player to be player 1.
        env.reset(start_player_index=1)
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 2 to move first.
        player_index = 1
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set mcts_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                # action = env.rule_bot_v0()
                action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                mcts_bot_time_list.append(t2 - t1)
                # rule_bot_v0_time_list.append(t2 - t1)

                player_index = 1
            # Set alpha_beta_bot to be player 2.
            else:
                t1 = time.time()
                action = env.bot_action_alpha_beta_pruning()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                alphabeta_pruning_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            print(state)
            print(action)
            if env.get_done_reward()[0]:
                print(state)

        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
    alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

    mcts_bot_mu = np.mean(mcts_bot_time_list)
    mcts_bot_var = np.var(mcts_bot_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
    print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

    print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
    print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


def test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between rule_bot and alphabeta_bot, where rule_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

    # List to record the time required for each decision round and the winner.
    alphabeta_pruning_time_list = []
    rule_bot_v0_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = TicTacToeEnv(EasyDict(cfg_tictactoe))
        # Reset the environment, set the board to a clean board and the  start player to be player 1.
        env.reset()
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 1 to move first.
        player_index = 0
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set rule_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                action = env.rule_bot_v0()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                # mcts_bot_time_list.append(t2 - t1)
                rule_bot_v0_time_list.append(t2 - t1)

                player_index = 1
            # Set alpha_beta_bot to be player 2.
            else:
                t1 = time.time()
                action = env.bot_action_alpha_beta_pruning()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                alphabeta_pruning_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            if env.get_done_reward()[0]:
                print(state)

        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
    alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

    rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
    rule_bot_v0_var = np.var(rule_bot_v0_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
    print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

    print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
    print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


def test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between mcts_bot and alphabeta_bot, where mcts_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

    # List to record the time required for each decision round and the winner.
    alphabeta_pruning_time_list = []
    mcts_bot_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = TicTacToeEnv(EasyDict(cfg_tictactoe))
        # Reset the environment, set the board  to a clean board and the  start player to be player 1.
        env.reset()
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 1 to move first.
        player_index = 0
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set mcts_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                # action = env.mcts_bot()
                action = player.get_actions(state, player_index=player_index, best_action_type = "most_visit")
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                # mcts_bot_time_list.append(t2 - t1)
                mcts_bot_time_list.append(t2 - t1)

                player_index = 1

            # Set alpha_beta_bot to be player 2.
            else:
                t1 = time.time()
                action = env.bot_action_alpha_beta_pruning()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                alphabeta_pruning_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            # Print the result of the game.
            if env.get_done_reward()[0]:
                print(state)
        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
    alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

    mcts_bot_mu = np.mean(mcts_bot_time_list)
    mcts_bot_var = np.var(mcts_bot_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
    print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

    print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
    print('mcts_bot_mu={}, bot_action_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


cfg_gomoku = dict(
    board_size=5,
    battle_mode='self_play_mode',
    bot_action_type='v0',  # {'v0', 'alpha_beta_pruning'}
    agent_vs_human=False,
    prob_random_agent=0,
    channel_last=True,
    scale=True,
    prob_random_action_in_bot=0.,
    check_action_to_connect4_in_bot_v0=False,
)


def test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
    """
    Overview:
        A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
    Arguments:
        - num_simulations (:obj:`int`): The number of the simulations required to find the best move.
    """
    # List to record the time required for each decision round and the winner.
    mcts_bot_time_list = []
    bot_action_time_list = []
    winner = []

    # Repeat the game for 10 rounds.
    for i in range(10):
        print('-' * 10 + str(i) + '-' * 10)
        # Initialize the game, where there are two players: player 1 and player 2.
        env = GomokuEnv(EasyDict(cfg_gomoku))
        # Reset the environment, set the board to a clean board and the  start player to be player 1.
        env.reset()
        state = env.board
        player = MCTSBot(env, 'a', num_simulations)  # player_index = 0, player = 1
        # Set player 1 to move first.
        player_index = 0
        while not env.get_done_reward()[0]:
            """
            Overview:
                The two players take turns to make moves, and the time required for each decision is recorded.
            """
            # Set rule_bot to be player 1.
            if player_index == 0:
                t1 = time.time()
                action = env.bot_action()
                # action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                mcts_bot_time_list.append(t2 - t1)
                player_index = 1
            
            # Set mcts_bot to be player 2.
            else:
                t1 = time.time()
                # action = env.bot_action()
                action = player.get_actions(state, player_index=player_index)
                t2 = time.time()
                # print("The time difference is :", t2-t1)
                bot_action_time_list.append(t2 - t1)
                player_index = 0
            env.step(action)
            state = env.board
            # Print the result of the game.
            if env.get_done_reward()[0]:
                print(state)

        # Record the winner.
        winner.append(env.get_done_winner()[1])

    # Calculate the variance and mean of decision times.
    mcts_bot_mu = np.mean(mcts_bot_time_list)
    mcts_bot_var = np.var(mcts_bot_time_list)

    bot_action_mu = np.mean(bot_action_time_list)
    bot_action_var = np.var(bot_action_time_list)

    # Print the information of the games.
    print('num_simulations={}\n'.format(num_simulations))
    print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
    print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

    print('bot_action_time_list={}\n'.format(bot_action_time_list))
    print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))

    print(
        'winner={}, draw={}, player1={}, player2={}\n'.format(
            winner, winner.count(-1), winner.count(1), winner.count(2)
        )
    )


if __name__ == '__main__':
    # ==============================================================
    # test win rate between alphabeta_bot and rule_bot_v0
    # ==============================================================
    # test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot()
    # test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot()
    # ==============================================================
    # test win rate between alphabeta_bot and mcts_bot
    # ==============================================================
    # test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=2000)
    # test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=2000)

    # ==============================================================
    # test win rate between mcts_bot and rule_bot_v0
    # ==============================================================
    test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50)
    # test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=500)
    # test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)

    # test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)