{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.9864026308059692, "min": 1.9082093238830566, "max": 3.2957332134246826, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 40300.13671875, "min": 26879.552734375, "max": 105463.4453125, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 61.175, "min": 41.857142857142854, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19576.0, "min": 12956.0, "max": 27200.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1605.358587035444, "min": 1197.4853284245494, "max": 1614.822387948391, "count": 490 }, "SoccerTwos.Self-play.ELO.sum": { "value": 256857.37392567107, "min": 2408.4811984478456, "max": 357555.93882403144, "count": 490 }, "SoccerTwos.Step.mean": { "value": 4999977.0, "min": 9622.0, "max": 4999977.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999977.0, "min": 9622.0, "max": 4999977.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.016187205910682678, "min": -0.09651190787553787, "max": 0.1405455321073532, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -2.60614013671875, "min": -14.380273818969727, "max": 23.486591339111328, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.018162962049245834, "min": -0.09739697724580765, "max": 0.15109671652317047, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -2.924237012863159, "min": -15.142280578613281, "max": 24.4108943939209, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.01075031002115759, "min": -0.5, "max": 0.4255837862556045, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 1.730799913406372, "min": -63.85000002384186, "max": 68.37259984016418, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.01075031002115759, "min": -0.5, "max": 0.4255837862556045, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 1.730799913406372, "min": -63.85000002384186, "max": 68.37259984016418, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.02015747803573807, "min": 0.010948155978015468, "max": 0.02407018078180651, "count": 241 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.02015747803573807, "min": 0.010948155978015468, "max": 0.02407018078180651, "count": 241 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10361638193329176, "min": 2.2083852400101023e-05, "max": 0.1238990381360054, "count": 241 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10361638193329176, "min": 2.2083852400101023e-05, "max": 0.1238990381360054, "count": 241 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10498877962430318, "min": 2.2387529164310156e-05, "max": 0.12585748732089996, "count": 241 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10498877962430318, "min": 2.2387529164310156e-05, "max": 0.12585748732089996, "count": 241 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 241 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 241 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 241 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 241 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 241 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 241 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1709313144", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Alberto\\Desktop\\Kelvins\\RL\\.rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1709332931" }, "total": 19786.673720299965, "count": 1, "self": 0.5133954999037087, "children": { "run_training.setup": { "total": 0.1658504999941215, "count": 1, "self": 0.1658504999941215 }, "TrainerController.start_learning": { "total": 19785.994474300067, "count": 1, "self": 11.747585355071351, "children": { "TrainerController._reset_env": { "total": 9.085932599497028, "count": 25, "self": 9.085932599497028 }, "TrainerController.advance": { "total": 19764.953639245476, "count": 341903, "self": 11.481712852255441, "children": { "env_step": { "total": 8428.533072478487, "count": 341903, "self": 6568.507706379984, "children": { "SubprocessEnvManager._take_step": { "total": 1852.474334913306, "count": 341903, "self": 62.0785688519245, "children": { "TorchPolicy.evaluate": { "total": 1790.3957660613814, "count": 629820, "self": 1790.3957660613814 } } }, "workers": { "total": 7.551031185197644, "count": 341903, "self": 0.0, "children": { "worker_root": { "total": 19764.28930737602, "count": 341903, "is_parallel": true, "self": 14579.895800008206, "children": { "steps_from_proto": { "total": 0.05217019992414862, "count": 50, "is_parallel": true, "self": 0.01144379936158657, "children": { "_process_rank_one_or_two_observation": { "total": 0.04072640056256205, "count": 200, "is_parallel": true, "self": 0.04072640056256205 } } }, "UnityEnvironment.step": { "total": 5184.341337167891, "count": 341903, "is_parallel": true, "self": 234.4080952522345, "children": { "UnityEnvironment._generate_step_input": { "total": 229.56586831097957, "count": 341903, "is_parallel": true, "self": 229.56586831097957 }, "communicator.exchange": { "total": 3919.5055778997485, "count": 341903, "is_parallel": true, "self": 3919.5055778997485 }, "steps_from_proto": { "total": 800.8617957049282, "count": 683806, "is_parallel": true, "self": 170.4639789017383, "children": { "_process_rank_one_or_two_observation": { "total": 630.3978168031899, "count": 2735224, "is_parallel": true, "self": 630.3978168031899 } } } } } } } } } } }, "trainer_advance": { "total": 11324.938853914733, "count": 341903, "self": 82.37284525635187, "children": { "process_trajectory": { "total": 2123.242219958571, "count": 341903, "self": 2121.250825158437, "children": { "RLTrainer._checkpoint": { "total": 1.9913948001340032, "count": 10, "self": 1.9913948001340032 } } }, "_update_policy": { "total": 9119.32378869981, "count": 241, "self": 907.658305199584, "children": { "TorchPOCAOptimizer.update": { "total": 8211.665483500226, "count": 7233, "self": 8211.665483500226 } } } } } } }, "trainer_threads": { "total": 2.100015990436077e-06, "count": 1, "self": 2.100015990436077e-06 }, "TrainerController._save_models": { "total": 0.20731500000692904, "count": 1, "self": 0.0038772999541833997, "children": { "RLTrainer._checkpoint": { "total": 0.20343770005274564, "count": 1, "self": 0.20343770005274564 } } } } } } }