{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.987900733947754, "min": 1.987900733947754, "max": 2.8546102046966553, "count": 26 }, "SoccerTwos.Policy.Entropy.sum": { "value": 397007.625, "min": 139487.671875, "max": 558627.0, "count": 26 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 59.66019417475728, "min": 49.16850551654965, "max": 91.63940520446097, "count": 26 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 196640.0, "min": 41648.0, "max": 197212.0, "count": 26 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1430.1056701675807, "min": 1258.2571707171523, "max": 1430.1056701675807, "count": 26 }, "SoccerTwos.Self-play.ELO.sum": { "value": 2356814.144436173, "min": 309531.2639964195, "max": 2751169.229663174, "count": 26 }, "SoccerTwos.Step.mean": { "value": 3999996.0, "min": 1499891.0, "max": 3999996.0, "count": 26 }, "SoccerTwos.Step.sum": { "value": 3999996.0, "min": 1499891.0, "max": 3999996.0, "count": 26 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.023623770102858543, "min": -0.04748684912919998, "max": 0.07822415977716446, "count": 26 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -38.908348083496094, "min": -78.63822174072266, "max": 119.05716705322266, "count": 26 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.023193733766674995, "min": -0.04754623770713806, "max": 0.0787821114063263, "count": 26 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -38.20008087158203, "min": -78.736572265625, "max": 119.9063720703125, "count": 26 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 26 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 26 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.04918627856207675, "min": -0.2980424484428094, "max": 0.07948849135590418, "count": 26 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -81.00980079174042, "min": -161.53160113096237, "max": 111.20439940690994, "count": 26 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.04918627856207675, "min": -0.2980424484428094, "max": 0.07948849135590418, "count": 26 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -81.00980079174042, "min": -161.53160113096237, "max": 111.20439940690994, "count": 26 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 26 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 26 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.018397579850716283, "min": 0.014280015331169123, "max": 0.01906332888213607, "count": 25 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.09198789925358142, "min": 0.06508319377339451, "max": 0.09531664441068036, "count": 25 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.1024769835670789, "min": 0.06179566949605941, "max": 0.11246717503915231, "count": 25 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.5123849178353945, "min": 0.30897834748029707, "max": 0.549869134525458, "count": 25 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10382400318980216, "min": 0.06299761322637398, "max": 0.1146553639943401, "count": 25 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.5191200159490108, "min": 0.3149880661318699, "max": 0.5587463647127151, "count": 25 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 25 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0014999999999999998, "min": 0.0012, "max": 0.0014999999999999998, "count": 25 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.2000000000000001, "min": 0.20000000000000007, "max": 0.2000000000000001, "count": 25 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 1.0000000000000004, "min": 0.8000000000000003, "max": 1.0000000000000004, "count": 25 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 25 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.025000000000000005, "min": 0.020000000000000004, "max": 0.025000000000000005, "count": 25 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1735551815", "python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]", "command_line_arguments": "/opt/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos-v2 --no-graphics --resume", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.2", "numpy_version": "1.23.5", "end_time_seconds": "1735569765" }, "total": 17950.380276207998, "count": 1, "self": 1.122823748999508, "children": { "run_training.setup": { "total": 0.03164449999999874, "count": 1, "self": 0.03164449999999874 }, "TrainerController.start_learning": { "total": 17949.225807959, "count": 1, "self": 2.9352819302912394, "children": { "TrainerController._reset_env": { "total": 5.844047995996178, "count": 27, "self": 5.844047995996178 }, "TrainerController.advance": { "total": 17940.25007365671, "count": 175532, "self": 2.7704436200801865, "children": { "env_step": { "total": 13786.285465815437, "count": 175532, "self": 13330.531572823995, "children": { "SubprocessEnvManager._take_step": { "total": 453.779812337664, "count": 175532, "self": 14.00926687950971, "children": { "TorchPolicy.evaluate": { "total": 439.7705454581543, "count": 317738, "self": 439.7705454581543 } } }, "workers": { "total": 1.9740806537791826, "count": 175532, "self": 0.0, "children": { "worker_root": { "total": 17940.071711200984, "count": 175532, "is_parallel": true, "self": 4938.5274700827085, "children": { "steps_from_proto": { "total": 0.07457846099593723, "count": 54, "is_parallel": true, "self": 0.009313241989218568, "children": { "_process_rank_one_or_two_observation": { "total": 0.06526521900671867, "count": 216, "is_parallel": true, "self": 0.06526521900671867 } } }, "UnityEnvironment.step": { "total": 13001.46966265728, "count": 175532, "is_parallel": true, "self": 37.82651737203014, "children": { "UnityEnvironment._generate_step_input": { "total": 224.72221277368357, "count": 175532, "is_parallel": true, "self": 224.72221277368357 }, "communicator.exchange": { "total": 12282.636984814073, "count": 175532, "is_parallel": true, "self": 12282.636984814073 }, "steps_from_proto": { "total": 456.28394769749264, "count": 351064, "is_parallel": true, "self": 52.513873125187786, "children": { "_process_rank_one_or_two_observation": { "total": 403.77007457230485, "count": 1404256, "is_parallel": true, "self": 403.77007457230485 } } } } } } } } } } }, "trainer_advance": { "total": 4151.1941642211905, "count": 175532, "self": 22.755258091840915, "children": { "process_trajectory": { "total": 670.41246533036, "count": 175532, "self": 669.196809913361, "children": { "RLTrainer._checkpoint": { "total": 1.2156554169990272, "count": 6, "self": 1.2156554169990272 } } }, "_update_policy": { "total": 3458.02644079899, "count": 122, "self": 330.51740095700006, "children": { "TorchPOCAOptimizer.update": { "total": 3127.5090398419898, "count": 3660, "self": 3127.5090398419898 } } } } } } }, "trainer_threads": { "total": 7.920025382190943e-07, "count": 1, "self": 7.920025382190943e-07 }, "TrainerController._save_models": { "total": 0.19640358399919933, "count": 1, "self": 0.005251626000244869, "children": { "RLTrainer._checkpoint": { "total": 0.19115195799895446, "count": 1, "self": 0.19115195799895446 } } } } } } }