{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.2734103202819824, "min": 3.2734103202819824, "max": 3.2957420349121094, "count": 7 }, "SoccerTwos.Policy.Entropy.sum": { "value": 81599.5703125, "min": 14411.767578125, "max": 105463.6875, "count": 7 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 999.0, "min": 679.2, "max": 999.0, "count": 7 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19980.0, "min": 13584.0, "max": 27228.0, "count": 7 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1199.4040698282472, "min": 1199.4040698282472, "max": 1201.2421091626256, "count": 5 }, "SoccerTwos.Self-play.ELO.sum": { "value": 7196.424418969484, "min": 2399.4648566222563, "max": 7207.452654975754, "count": 5 }, "SoccerTwos.Step.mean": { "value": 69280.0, "min": 9630.0, "max": 69280.0, "count": 7 }, "SoccerTwos.Step.sum": { "value": 69280.0, "min": 9630.0, "max": 69280.0, "count": 7 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.05819521099328995, "min": -0.08827058225870132, "max": -0.05788181349635124, "count": 7 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.5819520950317383, "min": -1.1475175619125366, "max": -0.5819520950317383, "count": 7 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.05953459069132805, "min": -0.08832216262817383, "max": -0.05901345610618591, "count": 7 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.5953459143638611, "min": -1.148148536682129, "max": -0.5953459143638611, "count": 7 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 7 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 7 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.1, "min": -0.46153846153846156, "max": 0.12713332970937094, "count": 7 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.0, "min": -6.0, "max": 1.5255999565124512, "count": 7 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.1, "min": -0.46153846153846156, "max": 0.12713332970937094, "count": 7 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.0, "min": -6.0, "max": 1.5255999565124512, "count": 7 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 7 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 7 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.017378868327553695, "min": 0.012821569272394603, "max": 0.017378868327553695, "count": 2 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.017378868327553695, "min": 0.012821569272394603, "max": 0.017378868327553695, "count": 2 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0006007445845170878, "min": 0.0006007445845170878, "max": 0.0034726805881493622, "count": 2 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0006007445845170878, "min": 0.0006007445845170878, "max": 0.0034726805881493622, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0006070454158664992, "min": 0.0006070454158664992, "max": 0.003050728494094478, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0006070454158664992, "min": 0.0006070454158664992, "max": 0.003050728494094478, "count": 2 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 2 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 2 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1707167402", "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:34:57) [MSC v.1936 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\HaoLin\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1707167589" }, "total": 187.0672854000004, "count": 1, "self": 0.018947499978821725, "children": { "run_training.setup": { "total": 0.2504971000016667, "count": 1, "self": 0.2504971000016667 }, "TrainerController.start_learning": { "total": 186.7978408000199, "count": 1, "self": 0.13684080087114125, "children": { "TrainerController._reset_env": { "total": 7.83422249997966, "count": 1, "self": 7.83422249997966 }, "TrainerController.advance": { "total": 178.53035289916443, "count": 4990, "self": 0.1473043977166526, "children": { "env_step": { "total": 100.7011244999594, "count": 4990, "self": 71.37111080193426, "children": { "SubprocessEnvManager._take_step": { "total": 29.24249519931618, "count": 4990, "self": 0.9942253968911245, "children": { "TorchPolicy.evaluate": { "total": 28.248269802425057, "count": 9938, "self": 28.248269802425057 } } }, "workers": { "total": 0.08751849870895967, "count": 4990, "self": 0.0, "children": { "worker_root": { "total": 179.64199290022952, "count": 4990, "is_parallel": true, "self": 126.85906480165431, "children": { "steps_from_proto": { "total": 0.0010844999924302101, "count": 2, "is_parallel": true, "self": 0.00022760004503652453, "children": { "_process_rank_one_or_two_observation": { "total": 0.0008568999473936856, "count": 8, "is_parallel": true, "self": 0.0008568999473936856 } } }, "UnityEnvironment.step": { "total": 52.78184359858278, "count": 4990, "is_parallel": true, "self": 3.026571402500849, "children": { "UnityEnvironment._generate_step_input": { "total": 3.474118596408516, "count": 4990, "is_parallel": true, "self": 3.474118596408516 }, "communicator.exchange": { "total": 36.09233499999391, "count": 4990, "is_parallel": true, "self": 36.09233499999391 }, "steps_from_proto": { "total": 10.188818599679507, "count": 9980, "is_parallel": true, "self": 2.3130016967188567, "children": { "_process_rank_one_or_two_observation": { "total": 7.875816902960651, "count": 39920, "is_parallel": true, "self": 7.875816902960651 } } } } } } } } } } }, "trainer_advance": { "total": 77.68192400148837, "count": 4990, "self": 0.7928799021756276, "children": { "process_trajectory": { "total": 14.639051599311642, "count": 4990, "self": 14.639051599311642 }, "_update_policy": { "total": 62.2499925000011, "count": 3, "self": 10.887031899765134, "children": { "TorchPOCAOptimizer.update": { "total": 51.362960600235965, "count": 66, "self": 51.362960600235965 } } } } } } }, "trainer_threads": { "total": 1.7999554984271526e-06, "count": 1, "self": 1.7999554984271526e-06 }, "TrainerController._save_models": { "total": 0.29642280004918575, "count": 1, "self": 0.007198800041805953, "children": { "RLTrainer._checkpoint": { "total": 0.2892240000073798, "count": 1, "self": 0.2892240000073798 } } } } } } }