{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.6786307096481323, "min": 1.5745259523391724, "max": 3.2957308292388916, "count": 1032 }, "SoccerTwos.Policy.Entropy.sum": { "value": 33250.31640625, "min": 21746.2734375, "max": 108114.2265625, "count": 1032 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 83.49152542372882, "min": 42.67857142857143, "max": 999.0, "count": 1032 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19704.0, "min": 11480.0, "max": 28624.0, "count": 1032 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1604.8054995841587, "min": 1200.3531830769516, "max": 1640.7670855763163, "count": 1007 }, "SoccerTwos.Self-play.ELO.sum": { "value": 189367.04895093074, "min": 2401.8571756034985, "max": 361101.02181843895, "count": 1007 }, "SoccerTwos.Step.mean": { "value": 10319889.0, "min": 9328.0, "max": 10319889.0, "count": 1032 }, "SoccerTwos.Step.sum": { "value": 10319889.0, "min": 9328.0, "max": 10319889.0, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.055513154715299606, "min": -0.1528235226869583, "max": 0.15979550778865814, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -6.5505523681640625, "min": -23.687644958496094, "max": 24.916748046875, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.05225207284092903, "min": -0.15386952459812164, "max": 0.16231504082679749, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -6.165744781494141, "min": -23.849777221679688, "max": 24.937114715576172, "count": 1032 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1032 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.10630339030492103, "min": -0.5, "max": 0.512679997086525, "count": 1032 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -12.543800055980682, "min": -64.06279993057251, "max": 50.80039978027344, "count": 1032 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.10630339030492103, "min": -0.5, "max": 0.512679997086525, "count": 1032 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -12.543800055980682, "min": -64.06279993057251, "max": 50.80039978027344, "count": 1032 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1032 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1032 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01641956247622147, "min": 0.010354276425762995, "max": 0.023882272428212066, "count": 497 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01641956247622147, "min": 0.010354276425762995, "max": 0.023882272428212066, "count": 497 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.07268681501348813, "min": 4.334058007771091e-06, "max": 0.12139192024866739, "count": 497 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.07268681501348813, "min": 4.334058007771091e-06, "max": 0.12139192024866739, "count": 497 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.07428343072533608, "min": 4.462567312657484e-06, "max": 0.1233118236064911, "count": 497 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.07428343072533608, "min": 4.462567312657484e-06, "max": 0.1233118236064911, "count": 497 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 497 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 497 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 497 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 497 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 497 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 497 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1716494591", "python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]", "command_line_arguments": "/Users/alec/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.2", "numpy_version": "1.23.5", "end_time_seconds": "1716557232" }, "total": 62641.04866970796, "count": 1, "self": 1.3235825409647077, "children": { "run_training.setup": { "total": 0.03111341712065041, "count": 1, "self": 0.03111341712065041 }, "TrainerController.start_learning": { "total": 62639.693973749876, "count": 1, "self": 14.336301096482202, "children": { "TrainerController._reset_env": { "total": 13.326263707596809, "count": 52, "self": 13.326263707596809 }, "TrainerController.advance": { "total": 62611.844168404816, "count": 702218, "self": 13.360000990331173, "children": { "env_step": { "total": 47977.580975174904, "count": 702218, "self": 45608.49260571436, "children": { "SubprocessEnvManager._take_step": { "total": 2357.7903887522407, "count": 702218, "self": 72.91520945960656, "children": { "TorchPolicy.evaluate": { "total": 2284.875179292634, "count": 1301272, "self": 2284.875179292634 } } }, "workers": { "total": 11.29798070830293, "count": 702218, "self": 0.0, "children": { "worker_root": { "total": 62588.38480353798, "count": 702218, "is_parallel": true, "self": 18699.627898640232, "children": { "steps_from_proto": { "total": 0.12047903845086694, "count": 104, "is_parallel": true, "self": 0.0172460675239563, "children": { "_process_rank_one_or_two_observation": { "total": 0.10323297092691064, "count": 416, "is_parallel": true, "self": 0.10323297092691064 } } }, "UnityEnvironment.step": { "total": 43888.636425859295, "count": 702218, "is_parallel": true, "self": 140.80379699892364, "children": { "UnityEnvironment._generate_step_input": { "total": 774.1627261883114, "count": 702218, "is_parallel": true, "self": 774.1627261883114 }, "communicator.exchange": { "total": 41407.156340755755, "count": 702218, "is_parallel": true, "self": 41407.156340755755 }, "steps_from_proto": { "total": 1566.5135619163048, "count": 1404436, "is_parallel": true, "self": 205.59177383757196, "children": { "_process_rank_one_or_two_observation": { "total": 1360.9217880787328, "count": 5617744, "is_parallel": true, "self": 1360.9217880787328 } } } } } } } } } } }, "trainer_advance": { "total": 14620.90319223958, "count": 702218, "self": 186.9079107141588, "children": { "process_trajectory": { "total": 2657.4464308621828, "count": 702218, "self": 2650.6117152359802, "children": { "RLTrainer._checkpoint": { "total": 6.834715626202524, "count": 20, "self": 6.834715626202524 } } }, "_update_policy": { "total": 11776.54885066324, "count": 498, "self": 1124.4483864412177, "children": { "TorchPOCAOptimizer.update": { "total": 10652.100464222021, "count": 14935, "self": 10652.100464222021 } } } } } } }, "trainer_threads": { "total": 1.00000761449337e-06, "count": 1, "self": 1.00000761449337e-06 }, "TrainerController._save_models": { "total": 0.18723954097367823, "count": 1, "self": 0.005996040999889374, "children": { "RLTrainer._checkpoint": { "total": 0.18124349997378886, "count": 1, "self": 0.18124349997378886 } } } } } } }