File size: 1,751 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os.path as osp
import yaml
import numpy as np
import torch
from .football_ikki import Environment
from .handyrl_core.model import load_model
model_path = osp.join(osp.dirname(__file__), 'models/1679.pth')
with open(osp.join(osp.dirname(__file__), 'config.yaml')) as f:
config = yaml.safe_load(f)
env = Environment(config['env_args'])
model = load_model(env.net()(env), model_path)
model.eval()
def output_think(env, obs, actions, p, v, r):
pmask = np.ones_like(p)
pmask[actions] = 0
p = p - pmask * 1e32
def softmax(x):
x = np.exp(x - np.max(x, axis=-1))
return x / x.sum(axis=-1)
sticky_actions = obs['players_raw'][0]['sticky_actions']
print(sticky_actions)
print(actions)
print((softmax(p) * 1000).astype(int))
print(v)
print(r)
prev_action = 0
reserved_action = None
def agent(obs):
global prev_action, reserved_action
info = [{'observation': obs, 'action': [prev_action]}, None]
env.play_info(info)
# print('step %d' % len(env.states))
x = env.observation(0)
p, v, r, _ = model.inference(x, None)
actions = env.legal_actions(0)
# output_think(env, obs, actions, p, v, r)
ap_list = sorted([(a, p[a]) for a in actions], key=lambda x: -x[1])
# you need return a list contains your single action(a int type number from [1, 18])
# be ware of your model output might be a float number, so make sure return a int type number.
action = ap_list[0][0]
if reserved_action is not None:
prev_action = reserved_action
reserved_action = None
# print('###RESERVED###')
else:
# split action
prev_action, reserved_action = env.special_to_actions(action)
return [prev_action]
|