File size: 2,800 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from zoo.board_games.tictactoe.config.tictactoe_muzero_bot_mode_config import main_config, create_config
from lzero.entry import eval_muzero
import numpy as np
if __name__ == "__main__":
"""
Entry point for the evaluation of the MuZero model on the TicTacToe environment.
Variables:
- model_path (:obj:`Optional[str]`): The pretrained model path, which should point to the ckpt file of the
pretrained model. An absolute path is recommended. In LightZero, the path is usually something like
``exp_name/ckpt/ckpt_best.pth.tar``.
- returns_mean_seeds (:obj:`List[float]`): List to store the mean returns for each seed.
- returns_seeds (:obj:`List[float]`): List to store the returns for each seed.
- seeds (:obj:`List[int]`): List of seeds for the environment.
- num_episodes_each_seed (:obj:`int`): Number of episodes to run for each seed.
- total_test_episodes (:obj:`int`): Total number of test episodes, computed as the product of the number of
seeds and the number of episodes per seed.
"""
# model_path = "./ckpt/ckpt_best.pth.tar"
model_path = None
seeds = [0]
num_episodes_each_seed = 1
# If True, you can play with the agent.
main_config.env.agent_vs_human = False
create_config.env_manager.type = 'base'
main_config.env.evaluator_env_num = 1
main_config.env.n_evaluator_episode = 1
total_test_episodes = num_episodes_each_seed * len(seeds)
# Enable saving of replay as a gif, specify the path to save the replay gif
main_config.env.save_replay_gif = True
main_config.env.replay_path_gif = './video'
returns_mean_seeds = []
returns_seeds = []
for seed in seeds:
returns_mean, returns = eval_muzero(
[main_config, create_config],
seed=seed,
num_episodes_each_seed=num_episodes_each_seed,
print_seed_details=True,
model_path=model_path
)
returns_mean_seeds.append(returns_mean)
returns_seeds.append(returns)
returns_mean_seeds = np.array(returns_mean_seeds)
returns_seeds = np.array(returns_seeds)
# Print evaluation results
print("=" * 20)
print(f"We evaluated a total of {len(seeds)} seeds. For each seed, we evaluated {num_episodes_each_seed} episode(s).")
print(f"For seeds {seeds}, the mean returns are {returns_mean_seeds}, and the returns are {returns_seeds}.")
print("Across all seeds, the mean reward is:", returns_mean_seeds.mean())
print(
f'win rate: {len(np.where(returns_seeds == 1.)[0]) / total_test_episodes}, draw rate: {len(np.where(returns_seeds == 0.)[0]) / total_test_episodes}, lose rate: {len(np.where(returns_seeds == -1.)[0]) / total_test_episodes}'
)
print("=" * 20)
|