{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.401779890060425, "min": 2.401779890060425, "max": 3.1004796028137207, "count": 7 }, "SoccerTwos.Policy.Entropy.sum": { "value": 33970.7734375, "min": 26474.041015625, "max": 99215.34375, "count": 7 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 698.0, "min": 641.5, "max": 999.0, "count": 7 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 8376.0, "min": 8376.0, "max": 31968.0, "count": 7 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1200.2478475868106, "min": 1200.2478475868106, "max": 1201.9913820042966, "count": 5 }, "SoccerTwos.Self-play.ELO.sum": { "value": 2400.4956951736212, "min": 2400.4956951736212, "max": 7211.94829202578, "count": 5 }, "SoccerTwos.Step.mean": { "value": 79884.0, "min": 9742.0, "max": 79884.0, "count": 8 }, "SoccerTwos.Step.sum": { "value": 79884.0, "min": 9742.0, "max": 79884.0, "count": 8 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.018005089834332466, "min": -0.042664702981710434, "max": -0.018005089834332466, "count": 8 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.2160610854625702, "min": -0.511976420879364, "max": -0.20499663054943085, "count": 8 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.01888716034591198, "min": -0.042664702981710434, "max": -0.01888716034591198, "count": 8 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.22664591670036316, "min": -0.511976420879364, "max": -0.22105064988136292, "count": 8 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 8 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 8 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.16666666666666666, "min": -0.36363636363636365, "max": 0.22012308010688195, "count": 8 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -2.0, "min": -4.0, "max": 2.8616000413894653, "count": 8 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.16666666666666666, "min": -0.36363636363636365, "max": 0.22012308010688195, "count": 8 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -2.0, "min": -4.0, "max": 2.8616000413894653, "count": 8 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 8 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 8 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.1284444789801325, "min": 0.054607713263895775, "max": 0.1284444789801325, "count": 2 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.1284444789801325, "min": 0.054607713263895775, "max": 0.1284444789801325, "count": 2 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0013965409792338808, "min": 0.0013965409792338808, "max": 6161.4450659125305, "count": 2 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0013965409792338808, "min": 0.0013965409792338808, "max": 6161.4450659125305, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.001396397186908871, "min": 0.001396397186908871, "max": 6139.078216775525, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.001396397186908871, "min": 0.001396397186908871, "max": 6139.078216775525, "count": 2 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.19999999999999996, "min": 0.19999999999999993, "max": 0.19999999999999996, "count": 2 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.19999999999999996, "min": 0.19999999999999993, "max": 0.19999999999999996, "count": 2 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005, "min": 0.005, "max": 0.005000000000000001, "count": 2 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005, "min": 0.005, "max": 0.005000000000000001, "count": 2 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1722425502", "python_version": "3.10.14 | packaged by Anaconda, Inc. | (main, May 6 2024, 19:44:50) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\rueed\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1722426553" }, "total": 1051.7074853, "count": 1, "self": 2.180884399999968, "children": { "run_training.setup": { "total": 0.1491899000000103, "count": 1, "self": 0.1491899000000103 }, "TrainerController.start_learning": { "total": 1049.377411, "count": 1, "self": 0.24202630000013414, "children": { "TrainerController._reset_env": { "total": 25.11751940000005, "count": 1, "self": 25.11751940000005 }, "TrainerController.advance": { "total": 1023.7370215, "count": 5734, "self": 0.2180017000054022, "children": { "env_step": { "total": 162.62429370000405, "count": 5734, "self": 112.44564299999945, "children": { "SubprocessEnvManager._take_step": { "total": 50.042818900001635, "count": 5734, "self": 1.5218775999993568, "children": { "TorchPolicy.evaluate": { "total": 48.52094130000228, "count": 11442, "self": 48.52094130000228 } } }, "workers": { "total": 0.13583180000296124, "count": 5734, "self": 0.0, "children": { "worker_root": { "total": 958.3195445999987, "count": 5734, "is_parallel": true, "self": 875.3747018000018, "children": { "steps_from_proto": { "total": 0.003804100000024846, "count": 2, "is_parallel": true, "self": 0.0008019999999078209, "children": { "_process_rank_one_or_two_observation": { "total": 0.003002100000117025, "count": 8, "is_parallel": true, "self": 0.003002100000117025 } } }, "UnityEnvironment.step": { "total": 82.94103869999685, "count": 5734, "is_parallel": true, "self": 3.7347740000009253, "children": { "UnityEnvironment._generate_step_input": { "total": 3.2139034000061883, "count": 5734, "is_parallel": true, "self": 3.2139034000061883 }, "communicator.exchange": { "total": 64.07419309999659, "count": 5734, "is_parallel": true, "self": 64.07419309999659 }, "steps_from_proto": { "total": 11.918168199993147, "count": 11468, "is_parallel": true, "self": 2.398435099986841, "children": { "_process_rank_one_or_two_observation": { "total": 9.519733100006306, "count": 45872, "is_parallel": true, "self": 9.519733100006306 } } } } } } } } } } }, "trainer_advance": { "total": 860.8947260999905, "count": 5734, "self": 1.2665090999859103, "children": { "process_trajectory": { "total": 102.20595260000448, "count": 5734, "self": 102.20595260000448 }, "_update_policy": { "total": 757.4222644000001, "count": 3, "self": 13.285107900000412, "children": { "TorchPOCAOptimizer.update": { "total": 744.1371564999997, "count": 105, "self": 744.1371564999997 } } } } } } }, "trainer_threads": { "total": 2.0999998469051206e-06, "count": 1, "self": 2.0999998469051206e-06 }, "TrainerController._save_models": { "total": 0.2808416999998826, "count": 1, "self": 0.02642559999981131, "children": { "RLTrainer._checkpoint": { "total": 0.2544161000000713, "count": 1, "self": 0.2544161000000713 } } } } } } }