{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.9602681398391724, "min": 1.9359997510910034, "max": 3.295745849609375, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 40522.6640625, "min": 19985.09375, "max": 117969.046875, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 50.8125, "min": 36.422222222222224, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19512.0, "min": 14288.0, "max": 26604.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1503.7269224655618, "min": 1169.6846285210643, "max": 1511.5707702508637, "count": 489 }, "SoccerTwos.Self-play.ELO.sum": { "value": 288715.56911338784, "min": 2339.8851540969695, "max": 399068.6687831659, "count": 489 }, "SoccerTwos.Step.mean": { "value": 4999992.0, "min": 9322.0, "max": 4999992.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999992.0, "min": 9322.0, "max": 4999992.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.02795848250389099, "min": -0.08959347009658813, "max": 0.15896980464458466, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -5.36802864074707, "min": -15.858044624328613, "max": 26.78872299194336, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.026928341016173363, "min": -0.09065592288970947, "max": 0.161281019449234, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.170241355895996, "min": -16.046098709106445, "max": 26.825647354125977, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.009449998537699381, "min": -0.7803428572203431, "max": 0.49932000041007996, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.8143997192382812, "min": -67.88880002498627, "max": 58.891400307416916, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.009449998537699381, "min": -0.7803428572203431, "max": 0.49932000041007996, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.8143997192382812, "min": -67.88880002498627, "max": 58.891400307416916, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01861924359788342, "min": 0.010616997404334446, "max": 0.025824711324336627, "count": 238 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01861924359788342, "min": 0.010616997404334446, "max": 0.025824711324336627, "count": 238 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11396111970146497, "min": 0.000981652188541678, "max": 0.13157074625293413, "count": 238 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11396111970146497, "min": 0.000981652188541678, "max": 0.13157074625293413, "count": 238 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11552784765760103, "min": 0.0009974301637460789, "max": 0.13415824870268503, "count": 238 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11552784765760103, "min": 0.0009974301637460789, "max": 0.13415824870268503, "count": 238 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 238 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 238 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 238 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 238 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 238 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 238 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1706695983", "python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]", "command_line_arguments": "/Users/yl/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos2 --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.0", "numpy_version": "1.23.5", "end_time_seconds": "1706726439" }, "total": 30455.995309529128, "count": 1, "self": 0.5184705872088671, "children": { "run_training.setup": { "total": 0.045423932839185, "count": 1, "self": 0.045423932839185 }, "TrainerController.start_learning": { "total": 30455.43141500908, "count": 1, "self": 6.008981778519228, "children": { "TrainerController._reset_env": { "total": 7.72993843187578, "count": 25, "self": 7.72993843187578 }, "TrainerController.advance": { "total": 30441.476012788713, "count": 335846, "self": 6.043721565278247, "children": { "env_step": { "total": 23080.538405468455, "count": 335846, "self": 22202.69925462338, "children": { "SubprocessEnvManager._take_step": { "total": 873.8876845745835, "count": 335846, "self": 32.98369637900032, "children": { "TorchPolicy.evaluate": { "total": 840.9039881955832, "count": 636636, "self": 840.9039881955832 } } }, "workers": { "total": 3.951466270489618, "count": 335846, "self": 0.0, "children": { "worker_root": { "total": 30439.896285607945, "count": 335846, "is_parallel": true, "self": 8957.397032326553, "children": { "steps_from_proto": { "total": 0.051946949446573853, "count": 50, "is_parallel": true, "self": 0.009704955853521824, "children": { "_process_rank_one_or_two_observation": { "total": 0.04224199359305203, "count": 200, "is_parallel": true, "self": 0.04224199359305203 } } }, "UnityEnvironment.step": { "total": 21482.447306331946, "count": 335846, "is_parallel": true, "self": 71.8631478776224, "children": { "UnityEnvironment._generate_step_input": { "total": 376.6306625206489, "count": 335846, "is_parallel": true, "self": 376.6306625206489 }, "communicator.exchange": { "total": 20411.39311322989, "count": 335846, "is_parallel": true, "self": 20411.39311322989 }, "steps_from_proto": { "total": 622.5603827037849, "count": 671692, "is_parallel": true, "self": 110.04838352883235, "children": { "_process_rank_one_or_two_observation": { "total": 512.5119991749525, "count": 2686768, "is_parallel": true, "self": 512.5119991749525 } } } } } } } } } } }, "trainer_advance": { "total": 7354.89388575498, "count": 335846, "self": 45.15888055972755, "children": { "process_trajectory": { "total": 895.5033374256454, "count": 335846, "self": 892.6511384991463, "children": { "RLTrainer._checkpoint": { "total": 2.8521989264991134, "count": 10, "self": 2.8521989264991134 } } }, "_update_policy": { "total": 6414.231667769607, "count": 238, "self": 616.4563849528786, "children": { "TorchPOCAOptimizer.update": { "total": 5797.775282816729, "count": 7140, "self": 5797.775282816729 } } } } } } }, "trainer_threads": { "total": 7.83940777182579e-07, "count": 1, "self": 7.83940777182579e-07 }, "TrainerController._save_models": { "total": 0.21648122603073716, "count": 1, "self": 0.0019202318508177996, "children": { "RLTrainer._checkpoint": { "total": 0.21456099417991936, "count": 1, "self": 0.21456099417991936 } } } } } } }