gomoku / DI-engine /ding /league /algorithm.py
zjowowen's picture
init space
079c32c
raw
history blame
1.75 kB
import numpy as np
def pfsp(win_rates: np.ndarray, weighting: str) -> np.ndarray:
"""
Overview:
Prioritized Fictitious Self-Play algorithm.
Process win_rates with a weighting function to get priority, then calculate the selection probability of each.
Arguments:
- win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N)
- weighting (:obj:`str`): pfsp weighting function type, refer to ``weighting_func`` below
Returns:
- probs (:obj:`np.ndarray`): a numpy ndarray of probability at which one element is selected, shape(N)
"""
weighting_func = {
'squared': lambda x: (1 - x) ** 2,
'variance': lambda x: x * (1 - x),
}
if weighting in weighting_func.keys():
fn = weighting_func[weighting]
else:
raise KeyError("invalid weighting arg: {} in pfsp".format(weighting))
assert isinstance(win_rates, np.ndarray)
assert win_rates.shape[0] >= 1, win_rates.shape
# all zero win rates case, return uniform selection prob
if win_rates.sum() < 1e-8:
return np.full_like(win_rates, 1.0 / len(win_rates))
fn_win_rates = fn(win_rates)
probs = fn_win_rates / fn_win_rates.sum()
return probs
def uniform(win_rates: np.ndarray) -> np.ndarray:
"""
Overview:
Uniform opponent selection algorithm. Select an opponent uniformly, regardless of historical win rates.
Arguments:
- win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N)
Returns:
- probs (:obj:`np.ndarray`): a numpy ndarray of uniform probability, shape(N)
"""
return np.full_like(win_rates, 1.0 / len(win_rates))