File size: 5,468 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from functools import lru_cache
from typing import Tuple, Union
import numpy as np
# Define expectimax search bot for 2048 env
def expectimax_search(grid: np.array, fast_search: bool = True) -> int:
"""
Overview:
Use Expectimax search algorithm to find the best action for 2048 env.
Adapted from https://github.com/xwjdsh/2048-ai/blob/master/ai/ai.go.
"""
# please refer to https://codemyroad.wordpress.com/2014/05/14/2048-ai-the-intelligent-bot/
model1 = np.array([[16, 15, 14, 13], [9, 10, 11, 12], [8, 7, 6, 5], [1, 2, 2, 4]])
model2 = np.array([[16, 15, 12, 4], [14, 13, 11, 3], [10, 9, 8, 2], [7, 6, 5, 1]])
model3 = np.array([[16, 15, 14, 4], [13, 12, 11, 3], [10, 9, 8, 2], [7, 6, 5, 1]])
# Use lru_cache decorator for caching, speeding up subsequent look-ups
@lru_cache(maxsize=512)
def get_model_score(value, i, j):
result = np.zeros(3 * 8)
for k, m in enumerate([model1, model2, model3]):
start = k * 8
result[start] += m[i, j] * value
# Scores of other 7 directions of the model
result[start + 1] += m[i, 3 - j] * value
result[start + 2] += m[j, i] * value
result[start + 3] += m[3 - j, i] * value
result[start + 4] += m[3 - i, 3 - j] * value
result[start + 5] += m[3 - i, j] * value
result[start + 6] += m[j, 3 - i] * value
result[start + 7] += m[3 - j, 3 - i] * value
return result
def get_score(grid: np.array) -> float:
# Calculate the score of the current layout
result = np.zeros(3 * 8)
for i in range(4):
for j in range(4):
if grid[i, j] != 0:
result += get_model_score(grid[i, j], i, j)
return result.max()
def expectation_search(grid: np.array, depth: int, chance_node: bool) -> Tuple[float, Union[int, None]]:
# Use Expectimax search algorithm to find the best action
# please refer to https://courses.cs.washington.edu/courses/cse473/11au/slides/cse473au11-adversarial-search.pdf
if depth == 0:
return get_score(grid), None
if chance_node:
cum_score = 0.
if fast_search:
choices = [[2, 0.9]]
else:
choices = zip([2, 4], [0.9, 0.1])
for value, prob in choices:
value, prob = 2, 0.9
for i in range(4):
for j in range(4):
if grid[i, j] == 0:
grid[i, j] = value
cum_score += prob * expectation_search(grid, depth - 1, False)[0]
grid[i, j] = 0
empty_count = np.sum(grid == 0)
cum_score /= empty_count
return cum_score, None
else:
best_score = 0
best_action = None
# 0, 1, 2, 3 mean top, right, bottom, left
for dire in [0, 1, 2, 3]:
new_grid, move_flag, _ = move(grid, dire)
if move_flag:
score = expectation_search(new_grid, depth - 1, True)[0]
if score > best_score:
best_score = score
best_action = dire
return best_score, best_action
# Select search depth based on the current maximum tile value
grid_max = grid.max()
if grid_max >= 2048:
depth = 6
elif grid_max >= 1024:
depth = 5
else:
depth = 4
# Call the expectation search algorithm and return the best action
_, best_action = expectation_search(grid, depth, False)
return best_action
# Define move function, implement move operation in 2048 game
def move(grid: np.array, action: int, game_score: int = 0) -> Tuple[np.array, bool, int]:
# execute action in 2048 game
# 0, 1, 2, 3 mean top, right, bottom, left
assert action in [0, 1, 2, 3], action
old_grid = grid
grid = np.copy(grid)
# rotate
if action == 0:
grid = np.rot90(grid)
elif action == 1:
grid = np.rot90(grid, k=2)
elif action == 2:
grid = np.rot90(grid, k=3)
# simple move
for i in range(4):
for j in range(3):
if grid[i, j] == 0:
grid[i, j] = grid[i, j + 1]
grid[i, j + 1] = 0
# merge
for i in range(4):
for j in range(3):
if grid[i, j] == grid[i, j + 1]:
game_score += 2 * grid[i, j]
grid[i, j] *= 2
grid[i, j + 1] = 0
# simple move
for i in range(4):
for j in range(3):
if grid[i, j] == 0:
grid[i, j] = grid[i, j + 1]
grid[i, j + 1] = 0
# rotate back
if action == 0:
grid = np.rot90(grid, k=3)
elif action == 1:
grid = np.rot90(grid, k=2)
elif action == 2:
grid = np.rot90(grid)
move_flag = np.any(old_grid != grid)
return grid, move_flag, game_score
# # Define generate function, randomly generate 2 or 4 in an empty location
def generate(grid: np.array) -> np.array:
number = np.random.choice([2, 4], p=[0.9, 0.1])
# get empty location
empty = np.where(grid == 0)
# random select one
index = np.random.randint(len(empty[0]))
# set new number
grid[empty[0][index], empty[1][index]] = number
# return new grid
return grid |