ppo-Pyramids-Training / ml-agents /tests /yamato /training_int_tests.py

Second Push

05c9ac2 over 1 year ago

6.67 kB

	import argparse
	import json
	import os
	import shutil
	import sys
	import subprocess
	import time

	from .yamato_utils import (
	find_executables,
	get_base_path,
	get_base_output_path,
	run_standalone_build,
	init_venv,
	override_config_file,
	checkout_csharp_version,
	undo_git_checkout,
	)


	def run_training(python_version: str, csharp_version: str) -> bool:
	latest = "latest"
	run_id = int(time.time() * 1000.0)
	print(
	f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
	)
	output_dir = "results"
	onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"

	if os.path.exists(onnx_file_expected):
	# Should never happen - make sure nothing leftover from an old test.
	print("Artifacts from previous build found!")
	return False

	base_path = get_base_path()
	print(f"Running in base path {base_path}")

	# Only build the standalone player if we're overriding the C# version
	# Otherwise we'll use the one built earlier in the pipeline.
	if csharp_version is not None:
	# We can't rely on the old C# code recognizing the commandline argument to set the output
	# So rename testPlayer (containing the most recent build) to something else temporarily
	artifact_path = get_base_output_path()
	full_player_path = os.path.join(artifact_path, "testPlayer.app")
	temp_player_path = os.path.join(artifact_path, "temp_testPlayer.app")
	final_player_path = os.path.join(
	artifact_path, f"testPlayer_{csharp_version}.app"
	)

	os.rename(full_player_path, temp_player_path)

	checkout_csharp_version(csharp_version)
	build_returncode = run_standalone_build(base_path)

	if build_returncode != 0:
	print(f"Standalone build FAILED! with return code {build_returncode}")
	return False

	# Now rename the newly-built executable, and restore the old one
	os.rename(full_player_path, final_player_path)
	os.rename(temp_player_path, full_player_path)
	standalone_player_path = f"testPlayer_{csharp_version}"
	else:
	standalone_player_path = "testPlayer"

	init_venv(python_version)

	# Copy the default training config but override the max_steps parameter,
	# and reduce the batch_size and buffer_size enough to ensure an update step happens.
	yaml_out = "override.yaml"
	overrides = {
	"hyperparameters": {"batch_size": 10, "buffer_size": 10},
	"max_steps": 100,
	}
	override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)

	log_output_path = f"{get_base_output_path()}/training.log"
	env_path = os.path.join(get_base_output_path(), standalone_player_path)
	mla_learn_cmd = [
	"mlagents-learn",
	yaml_out,
	"--force",
	"--env",
	env_path,
	"--run-id",
	str(run_id),
	"--no-graphics",
	"--env-args",
	"-logFile",
	log_output_path,
	]

	res = subprocess.run(mla_learn_cmd)

	# Save models as artifacts (only if we're using latest python and C#)
	if csharp_version is None and python_version is None:
	model_artifacts_dir = os.path.join(get_base_output_path(), "models")
	os.makedirs(model_artifacts_dir, exist_ok=True)
	if os.path.exists(onnx_file_expected):
	shutil.copy(onnx_file_expected, model_artifacts_dir)

	if res.returncode != 0 or not os.path.exists(onnx_file_expected):
	print("mlagents-learn run FAILED!")
	print("Command line: " + " ".join(mla_learn_cmd))
	subprocess.run(["cat", log_output_path])
	return False

	if csharp_version is None and python_version is None:
	# Use abs path so that loading doesn't get confused
	model_path = os.path.abspath(os.path.dirname(onnx_file_expected))
	inference_ok = run_inference(env_path, model_path, "onnx")
	if not inference_ok:
	return False

	print("mlagents-learn run SUCCEEDED!")
	return True


	def run_inference(env_path: str, output_path: str, model_extension: str) -> bool:
	start_time = time.time()
	exes = find_executables(env_path)
	if len(exes) != 1:
	print(f"Can't determine the player executable in {env_path}. Found {exes}.")
	return False

	log_output_path = f"{get_base_output_path()}/inference.{model_extension}.txt"

	# 10 minutes for inference is more than enough
	process_timeout = 10 * 60
	# Try to gracefully exit a few seconds before that.
	model_override_timeout = process_timeout - 15

	exe_path = exes[0]
	args = [
	exe_path,
	"-nographics",
	"-batchmode",
	"-logfile",
	log_output_path,
	"--mlagents-override-model-directory",
	output_path,
	"--mlagents-quit-on-load-failure",
	"--mlagents-quit-after-episodes",
	"1",
	"--mlagents-override-model-extension",
	model_extension,
	"--mlagents-quit-after-seconds",
	str(model_override_timeout),
	]
	print(f"Starting inference with args {' '.join(args)}")
	res = subprocess.run(args, timeout=process_timeout)
	end_time = time.time()
	if res.returncode != 0:
	print("Error running inference!")
	print("Command line: " + " ".join(args))
	subprocess.run(["cat", log_output_path])
	return False
	else:
	print(f"Inference finished! Took {end_time - start_time} seconds")

	# Check the artifacts directory for the timers, so we can get the gauges
	timer_file = f"{exe_path}_Data/ML-Agents/Timers/3DBall_timers.json"
	with open(timer_file) as f:
	timer_data = json.load(f)

	gauges = timer_data.get("gauges", {})
	rewards = gauges.get("Override_3DBall.CumulativeReward", {})
	max_reward = rewards.get("max")
	if max_reward is None:
	print(
	"Unable to find rewards in timer file. This usually indicates a problem with Barracuda or inference."
	)
	return False
	# We could check that the rewards are over a threshold, but since we train for so short a time,
	# the values could be highly variable. So don't do it for now.

	return True


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--python", default=None)
	parser.add_argument("--csharp", default=None)
	args = parser.parse_args()

	try:
	ok = run_training(args.python, args.csharp)
	if not ok:
	sys.exit(1)

	finally:
	# Cleanup - this gets executed even if we hit sys.exit()
	undo_git_checkout()


	if __name__ == "__main__":
	main()