File size: 6,236 Bytes
05c9ac2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
using System;
using UnityEngine;
using Unity.MLAgents;
namespace Unity.MLAgentsExamples
{
/// <summary>
/// State of the "game" when showing all steps of the simulation. This is only used outside of training.
/// The state diagram is
///
/// | <--------------------------------------- ^
/// | |
/// v |
/// +--------+ +-------+ +-----+ +------+
/// |Find | ---> |Clear | ---> |Drop | ---> |Fill |
/// |Matches | |Matched| | | |Empty |
/// +--------+ +-------+ +-----+ +------+
///
/// | ^
/// | |
/// v |
///
/// +--------+
/// |Wait for|
/// |Move |
/// +--------+
///
/// The stats advances each "MoveTime" seconds.
/// </summary>
enum State
{
/// <summary>
/// Guard value, should never happen.
/// </summary>
Invalid = -1,
/// <summary>
/// Look for matches. If there are matches, the next state is ClearMatched, otherwise WaitForMove.
/// </summary>
FindMatches = 0,
/// <summary>
/// Remove matched cells and replace them with a placeholder value.
/// </summary>
ClearMatched = 1,
/// <summary>
/// Move cells "down" to fill empty space.
/// </summary>
Drop = 2,
/// <summary>
/// Replace empty cells with new random values.
/// </summary>
FillEmpty = 3,
/// <summary>
/// Request a move from the Agent.
/// </summary>
WaitForMove = 4,
}
public class Match3Agent : Agent
{
[HideInInspector]
public Match3Board Board;
public float MoveTime = 1.0f;
public int MaxMoves = 500;
State m_CurrentState = State.WaitForMove;
float m_TimeUntilMove;
private int m_MovesMade;
private ModelOverrider m_ModelOverrider;
private const float k_RewardMultiplier = 0.01f;
protected override void Awake()
{
base.Awake();
Board = GetComponent<Match3Board>();
m_ModelOverrider = GetComponent<ModelOverrider>();
}
public override void OnEpisodeBegin()
{
base.OnEpisodeBegin();
Board.UpdateCurrentBoardSize();
Board.InitSettled();
m_CurrentState = State.FindMatches;
m_TimeUntilMove = MoveTime;
m_MovesMade = 0;
}
private void FixedUpdate()
{
// Make a move every step if we're training, or we're overriding models in CI.
var useFast = Academy.Instance.IsCommunicatorOn || (m_ModelOverrider != null && m_ModelOverrider.HasOverrides);
if (useFast)
{
FastUpdate();
}
else
{
AnimatedUpdate();
}
// We can't use the normal MaxSteps system to decide when to end an episode,
// since different agents will make moves at different frequencies (depending on the number of
// chained moves). So track a number of moves per Agent and manually interrupt the episode.
if (m_MovesMade >= MaxMoves)
{
EpisodeInterrupted();
}
}
void FastUpdate()
{
while (true)
{
var hasMatched = Board.MarkMatchedCells();
if (!hasMatched)
{
break;
}
var pointsEarned = Board.ClearMatchedCells();
AddReward(k_RewardMultiplier * pointsEarned);
Board.DropCells();
Board.FillFromAbove();
}
while (!HasValidMoves())
{
// Shuffle the board until we have a valid move.
Board.InitSettled();
}
RequestDecision();
m_MovesMade++;
}
void AnimatedUpdate()
{
m_TimeUntilMove -= Time.deltaTime;
if (m_TimeUntilMove > 0.0f)
{
return;
}
m_TimeUntilMove = MoveTime;
State nextState;
switch (m_CurrentState)
{
case State.FindMatches:
var hasMatched = Board.MarkMatchedCells();
nextState = hasMatched ? State.ClearMatched : State.WaitForMove;
if (nextState == State.WaitForMove)
{
m_MovesMade++;
}
break;
case State.ClearMatched:
var pointsEarned = Board.ClearMatchedCells();
AddReward(k_RewardMultiplier * pointsEarned);
nextState = State.Drop;
break;
case State.Drop:
Board.DropCells();
nextState = State.FillEmpty;
break;
case State.FillEmpty:
Board.FillFromAbove();
nextState = State.FindMatches;
break;
case State.WaitForMove:
while (true)
{
// Shuffle the board until we have a valid move.
bool hasMoves = HasValidMoves();
if (hasMoves)
{
break;
}
Board.InitSettled();
}
RequestDecision();
nextState = State.FindMatches;
break;
default:
throw new ArgumentOutOfRangeException();
}
m_CurrentState = nextState;
}
bool HasValidMoves()
{
foreach (var unused in Board.ValidMoves())
{
return true;
}
return false;
}
}
}
|