Second Push

05c9ac2 over 1 year ago

4.35 kB

	from mlagents_envs.logging_util import get_logger
	from typing import Deque, Dict
	from collections import deque
	from mlagents.trainers.ghost.trainer import GhostTrainer

	logger = get_logger(__name__)


	class GhostController:
	"""
	GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query
	it to get the current learning team. The GhostController cycles through team ids every 'swap_interval'
	which corresponds to the number of trainer steps between changing learning teams.
	The GhostController is a unique object and there can only be one per training run.
	"""

	def __init__(self, maxlen: int = 10):
	"""
	Create a GhostController.
	:param maxlen: Maximum number of GhostTrainers allowed in this GhostController
	"""

	# Tracks last swap step for each learning team because trainer
	# steps of all GhostTrainers do not increment together
	self._queue: Deque[int] = deque(maxlen=maxlen)
	self._learning_team: int = -1
	# Dict from team id to GhostTrainer for ELO calculation
	self._ghost_trainers: Dict[int, GhostTrainer] = {}
	# Signals to the trainer control to perform a hard change_training_team
	self._changed_training_team = False

	@property
	def get_learning_team(self) -> int:
	"""
	Returns the current learning team.
	:return: The learning team id
	"""
	return self._learning_team

	def should_reset(self) -> bool:
	"""
	Whether or not team change occurred. Causes full reset in trainer_controller
	:return: The truth value of the team changing
	"""
	changed_team = self._changed_training_team
	if self._changed_training_team:
	self._changed_training_team = False
	return changed_team

	def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
	"""
	Given a team_id and trainer, add to queue and trainers if not already.
	The GhostTrainer is used later by the controller to get ELO ratings of agents.
	:param team_id: The team_id of an agent managed by this GhostTrainer
	:param trainer: A GhostTrainer that manages this team_id.
	"""
	if team_id not in self._ghost_trainers:
	self._ghost_trainers[team_id] = trainer
	if self._learning_team < 0:
	self._learning_team = team_id
	else:
	self._queue.append(team_id)

	def change_training_team(self, step: int) -> None:
	"""
	The current learning team is added to the end of the queue and then updated with the
	next in line.
	:param step: The step of the trainer for debugging
	"""
	self._queue.append(self._learning_team)
	self._learning_team = self._queue.popleft()
	logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
	self._changed_training_team = True

	# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
	# https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/
	# ELO calculation
	# TODO : Generalize this to more than two teams
	def compute_elo_rating_changes(self, rating: float, result: float) -> float:
	"""
	Calculates ELO. Given the rating of the learning team and result. The GhostController
	queries the other GhostTrainers for the ELO of their agent that is currently being deployed.
	Note, this could be the current agent or a past snapshot.
	:param rating: Rating of the learning team.
	:param result: Win, loss, or draw from the perspective of the learning team.
	:return: The change in ELO.
	"""
	opponent_rating: float = 0.0
	for team_id, trainer in self._ghost_trainers.items():
	if team_id != self._learning_team:
	opponent_rating = trainer.get_opponent_elo()
	r1 = pow(10, rating / 400)
	r2 = pow(10, opponent_rating / 400)

	summed = r1 + r2
	e1 = r1 / summed

	change = result - e1
	for team_id, trainer in self._ghost_trainers.items():
	if team_id != self._learning_team:
	trainer.change_opponent_elo(change)

	return change