gomoku / LightZero /zoo /game_2048 /entry /2048_bot_eval.py
zjowowen's picture
init space
079c32c
raw
history blame
1.96 kB
import numpy as np
from easydict import EasyDict
from rich import print
from zoo.game_2048.envs.expectimax_search_based_bot import expectimax_search
from zoo.game_2048.envs.game_2048_env import Game2048Env
# Define game configuration
config = EasyDict(dict(
env_name="game_2048",
# (str) The render mode. Options are 'None', 'state_realtime_mode', 'image_realtime_mode' or 'image_savefile_mode'.
# If None, then the game will not be rendered.
render_mode='image_realtime_mode',
replay_format='gif',
replay_name_suffix='bot',
replay_path=None,
act_scale=True,
channel_last=True,
obs_type='raw_board', # options=['raw_board', 'raw_encoded_board', 'dict_encoded_board']
reward_type='raw', # options=['raw', 'merged_tiles_plus_log_max_tile_num']
reward_normalize=False,
reward_norm_scale=100,
max_tile=int(2 ** 16),
delay_reward_step=0,
prob_random_agent=0.,
max_episode_steps=int(1e4),
is_collect=False,
ignore_legal_actions=True,
need_flatten=False,
num_of_possible_chance_tile=2,
possible_tiles=np.array([2, 4]),
tile_probabilities=np.array([0.9, 0.1]),
))
if __name__ == "__main__":
game_2048_env = Game2048Env(config)
obs = game_2048_env.reset()
print('init board state: ')
game_2048_env.render()
step = 0
while True:
print('=' * 40)
grid = obs.astype(np.int64)
# action = game_2048_env.human_to_action() # which obtain about 10000 score
# action = game_2048_env.random_action() # which obtain about 1000 score
action = expectimax_search(grid) # which obtain about 300000~70000 score
obs, reward, done, info = game_2048_env.step(action)
step += 1
print(f"step: {step}, action: {action}, reward: {reward}, raw_reward: {info['raw_reward']}")
game_2048_env.render(mode='human')
if done:
print('total_step_number: {}'.format(step))
break