First Push

6b373f0 verified 10 months ago

15.3 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 3.191042900085449,
	"min": 3.188009023666382,
	"max": 3.295745849609375,
	"count": 58
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 94659.09375,
	"min": 19849.404296875,
	"max": 105463.796875,
	"count": 58
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 999.0,
	"min": 436.8333333333333,
	"max": 999.0,
	"count": 58
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19980.0,
	"min": 15712.0,
	"max": 25080.0,
	"count": 58
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1193.5186244967717,
	"min": 1193.5186244967717,
	"max": 1201.1043969522968,
	"count": 30
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 2387.0372489935435,
	"min": 2387.0372489935435,
	"max": 16789.069106563373,
	"count": 30
	},
	"SoccerTwos.Step.mean": {
	"value": 579188.0,
	"min": 9558.0,
	"max": 579188.0,
	"count": 58
	},
	"SoccerTwos.Step.sum": {
	"value": 579188.0,
	"min": 9558.0,
	"max": 579188.0,
	"count": 58
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.0009201146895065904,
	"min": -0.027440009638667107,
	"max": 0.003572183195501566,
	"count": 56
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -0.00920114666223526,
	"min": -0.3292801082134247,
	"max": 0.03929401561617851,
	"count": 56
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.0010390093084424734,
	"min": -0.029093032702803612,
	"max": 0.004057199228554964,
	"count": 56
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -0.010390093550086021,
	"min": -0.34911638498306274,
	"max": 0.04462919384241104,
	"count": 56
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 58
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 58
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.0,
	"min": -0.40045714378356934,
	"max": 0.3626666704813639,
	"count": 58
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 0.0,
	"min": -7.565999984741211,
	"max": 5.440000057220459,
	"count": 58
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.0,
	"min": -0.40045714378356934,
	"max": 0.3626666704813639,
	"count": 58
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 0.0,
	"min": -7.565999984741211,
	"max": 5.440000057220459,
	"count": 58
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 58
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 58
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.014809232100378721,
	"min": 0.011454915601643734,
	"max": 0.02108337269698192,
	"count": 26
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.014809232100378721,
	"min": 0.011454915601643734,
	"max": 0.02108337269698192,
	"count": 26
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 4.602768288653654e-05,
	"min": 5.892100754560185e-07,
	"max": 0.005765641395313045,
	"count": 25
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 4.602768288653654e-05,
	"min": 5.892100754560185e-07,
	"max": 0.005765641395313045,
	"count": 25
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 4.030396430607652e-05,
	"min": 7.071624556450236e-07,
	"max": 0.00548537018088003,
	"count": 25
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 4.030396430607652e-05,
	"min": 7.071624556450236e-07,
	"max": 0.00548537018088003,
	"count": 25
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 26
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 26
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 26
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 26
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 26
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 26
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1710442538",
	"python_version": "3.10.12 (main, Jul 5 2023, 15:02:25) [Clang 14.0.6 ]",
	"command_line_arguments": "/Users/arthur/opt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.1",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1710444676"
	},
	"total": 2138.570155499998,
	"count": 1,
	"self": 0.2320390830282122,
	"children": {
	"run_training.setup": {
	"total": 0.02419662498869002,
	"count": 1,
	"self": 0.02419662498869002
	},
	"TrainerController.start_learning": {
	"total": 2138.313919791981,
	"count": 1,
	"self": 0.47666625687270425,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.184423332975712,
	"count": 3,
	"self": 4.184423332975712
	},
	"TrainerController.advance": {
	"total": 2133.5701982021274,
	"count": 37794,
	"self": 0.43276771568343975,
	"children": {
	"env_step": {
	"total": 1696.3597711053735,
	"count": 37794,
	"self": 1617.341892782977,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 78.71722831411171,
	"count": 37794,
	"self": 2.478860310249729,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 76.23836800386198,
	"count": 75047,
	"self": 76.23836800386198
	}
	}
	},
	"workers": {
	"total": 0.30065000828471966,
	"count": 37793,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2134.666772092547,
	"count": 37793,
	"is_parallel": true,
	"self": 584.650361949316,
	"children": {
	"steps_from_proto": {
	"total": 0.005337166949175298,
	"count": 6,
	"is_parallel": true,
	"self": 0.0008250439132098109,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.004512123035965487,
	"count": 24,
	"is_parallel": true,
	"self": 0.004512123035965487
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1550.0110729762819,
	"count": 37793,
	"is_parallel": true,
	"self": 5.278928307321621,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 28.449942821549485,
	"count": 37793,
	"is_parallel": true,
	"self": 28.449942821549485
	},
	"communicator.exchange": {
	"total": 1459.0250641308958,
	"count": 37793,
	"is_parallel": true,
	"self": 1459.0250641308958
	},
	"steps_from_proto": {
	"total": 57.25713771651499,
	"count": 75586,
	"is_parallel": true,
	"self": 6.882369338476565,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 50.374768378038425,
	"count": 302344,
	"is_parallel": true,
	"self": 50.374768378038425
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 436.7776593810704,
	"count": 37793,
	"self": 4.52136982275988,
	"children": {
	"process_trajectory": {
	"total": 65.25087801538757,
	"count": 37793,
	"self": 65.09391884840443,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.1569591669831425,
	"count": 1,
	"self": 0.1569591669831425
	}
	}
	},
	"_update_policy": {
	"total": 367.00541154292296,
	"count": 27,
	"self": 38.52848369462299,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 328.47692784829997,
	"count": 810,
	"self": 328.47692784829997
	}
	}
	}
	}
	}
	}
	},
	"TrainerController._save_models": {
	"total": 0.08263200000510551,
	"count": 1,
	"self": 3.095800639130175e-05,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08260104199871421,
	"count": 1,
	"self": 0.08260104199871421
	}
	}
	}
	}
	}
	}
	}