{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.5312188863754272, "min": 1.340819239616394, "max": 3.2957217693328857, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 27733.4375, "min": 4395.8408203125, "max": 135703.703125, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 82.38983050847457, "min": 39.13709677419355, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19444.0, "min": 164.0, "max": 31968.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1630.5894777256385, "min": 1181.5905922473478, "max": 1650.7439009494178, "count": 4985 }, "SoccerTwos.Self-play.ELO.sum": { "value": 192409.55837162535, "min": 2364.785459149737, "max": 384814.0138152831, "count": 4985 }, "SoccerTwos.Step.mean": { "value": 49999939.0, "min": 9968.0, "max": 49999939.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999939.0, "min": 9968.0, "max": 49999939.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.017107047140598297, "min": -0.14119431376457214, "max": 0.21114686131477356, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -2.0186314582824707, "min": -25.456342697143555, "max": 21.229524612426758, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.01714133657515049, "min": -0.14103247225284576, "max": 0.2098654806613922, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -2.0226776599884033, "min": -25.710233688354492, "max": 20.74159049987793, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.010835595555224661, "min": -0.7297473683169013, "max": 0.471263162791729, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.27860027551651, "min": -60.449599623680115, "max": 58.70319998264313, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.010835595555224661, "min": -0.7297473683169013, "max": 0.471263162791729, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.27860027551651, "min": -60.449599623680115, "max": 58.70319998264313, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.020745757979845316, "min": 0.00981990631359319, "max": 0.026678954934080443, "count": 2421 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.020745757979845316, "min": 0.00981990631359319, "max": 0.026678954934080443, "count": 2421 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.07859083066383997, "min": 6.963343742730407e-05, "max": 0.13148470247785252, "count": 2421 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.07859083066383997, "min": 6.963343742730407e-05, "max": 0.13148470247785252, "count": 2421 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.07936860620975494, "min": 7.014683821277382e-05, "max": 0.1338403267165025, "count": 2421 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.07936860620975494, "min": 7.014683821277382e-05, "max": 0.1338403267165025, "count": 2421 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2421 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2421 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 2421 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 2421 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 2421 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 2421 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1728814196", "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", "command_line_arguments": "/home/nagi/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.1+cu121", "numpy_version": "1.23.5", "end_time_seconds": "1728886394" }, "total": 72197.922459742, "count": 1, "self": 1.0210029189911438, "children": { "run_training.setup": { "total": 0.012210681000169643, "count": 1, "self": 0.012210681000169643 }, "TrainerController.start_learning": { "total": 72196.889246142, "count": 1, "self": 51.1761235750746, "children": { "TrainerController._reset_env": { "total": 4.610870969944699, "count": 250, "self": 4.610870969944699 }, "TrainerController.advance": { "total": 72141.01020455398, "count": 3429249, "self": 52.07689644732454, "children": { "env_step": { "total": 57992.53542821247, "count": 3429249, "self": 40204.73437785148, "children": { "SubprocessEnvManager._take_step": { "total": 17756.30232411364, "count": 3429249, "self": 296.7708337275253, "children": { "TorchPolicy.evaluate": { "total": 17459.531490386114, "count": 6286026, "self": 17459.531490386114 } } }, "workers": { "total": 31.49872624735235, "count": 3429249, "self": 0.0, "children": { "worker_root": { "total": 72101.60515642047, "count": 3429249, "is_parallel": true, "self": 36896.33861923412, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0021235510000678914, "count": 2, "is_parallel": true, "self": 0.00090454100018178, "children": { "_process_rank_one_or_two_observation": { "total": 0.0012190099998861115, "count": 8, "is_parallel": true, "self": 0.0012190099998861115 } } }, "UnityEnvironment.step": { "total": 0.01977080400001796, "count": 1, "is_parallel": true, "self": 0.00034734999985630566, "children": { "UnityEnvironment._generate_step_input": { "total": 0.0002519260001463408, "count": 1, "is_parallel": true, "self": 0.0002519260001463408 }, "communicator.exchange": { "total": 0.01814612099997248, "count": 1, "is_parallel": true, "self": 0.01814612099997248 }, "steps_from_proto": { "total": 0.0010254070000428328, "count": 2, "is_parallel": true, "self": 0.00024765400007709104, "children": { "_process_rank_one_or_two_observation": { "total": 0.0007777529999657418, "count": 8, "is_parallel": true, "self": 0.0007777529999657418 } } } } } } }, "UnityEnvironment.step": { "total": 35205.030267951384, "count": 3429248, "is_parallel": true, "self": 1034.706717121553, "children": { "UnityEnvironment._generate_step_input": { "total": 635.1617354733712, "count": 3429248, "is_parallel": true, "self": 635.1617354733712 }, "communicator.exchange": { "total": 30483.28878681977, "count": 3429248, "is_parallel": true, "self": 30483.28878681977 }, "steps_from_proto": { "total": 3051.8730285366937, "count": 6858496, "is_parallel": true, "self": 688.1159350191438, "children": { "_process_rank_one_or_two_observation": { "total": 2363.75709351755, "count": 27433984, "is_parallel": true, "self": 2363.75709351755 } } } } }, "steps_from_proto": { "total": 0.23626923496658492, "count": 498, "is_parallel": true, "self": 0.054451374094924176, "children": { "_process_rank_one_or_two_observation": { "total": 0.18181786087166074, "count": 1992, "is_parallel": true, "self": 0.18181786087166074 } } } } } } } } }, "trainer_advance": { "total": 14096.397879894184, "count": 3429249, "self": 352.32939927972257, "children": { "process_trajectory": { "total": 7242.712258473682, "count": 3429249, "self": 7233.037569496666, "children": { "RLTrainer._checkpoint": { "total": 9.674688977015194, "count": 100, "self": 9.674688977015194 } } }, "_update_policy": { "total": 6501.35622214078, "count": 2421, "self": 3916.9756206946913, "children": { "TorchPOCAOptimizer.update": { "total": 2584.380601446089, "count": 72642, "self": 2584.380601446089 } } } } } } }, "trainer_threads": { "total": 4.869943950325251e-07, "count": 1, "self": 4.869943950325251e-07 }, "TrainerController._save_models": { "total": 0.09204655600478873, "count": 1, "self": 0.0009647410042816773, "children": { "RLTrainer._checkpoint": { "total": 0.09108181500050705, "count": 1, "self": 0.09108181500050705 } } } } } } }