{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.319894552230835, "min": 1.704789638519287, "max": 3.2956809997558594, "count": 4435 }, "SoccerTwos.Policy.Entropy.sum": { "value": 45135.8671875, "min": 17959.859375, "max": 125056.6640625, "count": 4435 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 146.6764705882353, "min": 74.42424242424242, "max": 999.0, "count": 4435 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19948.0, "min": 16348.0, "max": 23700.0, "count": 4435 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1300.6487739559907, "min": 1177.7477999727355, "max": 1367.28318734369, "count": 4424 }, "SoccerTwos.Self-play.ELO.sum": { "value": 88444.11662900737, "min": 2358.987380151803, "max": 171525.63468767735, "count": 4424 }, "SoccerTwos.Step.mean": { "value": 44349946.0, "min": 9520.0, "max": 44349946.0, "count": 4435 }, "SoccerTwos.Step.sum": { "value": 44349946.0, "min": 9520.0, "max": 44349946.0, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.04153120517730713, "min": -0.15775437653064728, "max": 0.12938007712364197, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -2.782590866088867, "min": -13.886152267456055, "max": 11.155123710632324, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.041599880903959274, "min": -0.15759043395519257, "max": 0.13083183765411377, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -2.7871921062469482, "min": -14.393086433410645, "max": 11.503641128540039, "count": 4435 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 4435 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.20413731194254178, "min": -0.7483679986000061, "max": 0.5099499952048063, "count": 4435 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -13.677199900150299, "min": -49.2292001247406, "max": 32.72940009832382, "count": 4435 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.20413731194254178, "min": -0.7483679986000061, "max": 0.5099499952048063, "count": 4435 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -13.677199900150299, "min": -49.2292001247406, "max": 32.72940009832382, "count": 4435 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 4435 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 4435 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016407303039450198, "min": 0.00979746032076946, "max": 0.02626747252419591, "count": 1074 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016407303039450198, "min": 0.00979746032076946, "max": 0.02626747252419591, "count": 1074 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.08357207104563713, "min": 0.0017215843149460852, "max": 0.13385179728269578, "count": 1074 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.08357207104563713, "min": 0.0017215843149460852, "max": 0.13385179728269578, "count": 1074 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.08435913383960723, "min": 0.0017151305661536754, "max": 0.13535505294799804, "count": 1074 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.08435913383960723, "min": 0.0017151305661536754, "max": 0.13535505294799804, "count": 1074 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.003, "min": 0.003, "max": 0.003, "count": 1074 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.003, "min": 0.003, "max": 0.003, "count": 1074 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.19999999999999996, "min": 0.19999999999999996, "max": 0.19999999999999996, "count": 1074 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.19999999999999996, "min": 0.19999999999999996, "max": 0.19999999999999996, "count": 1074 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005, "min": 0.005, "max": 0.005, "count": 1074 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005, "min": 0.005, "max": 0.005, "count": 1074 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1721506254", "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]", "command_line_arguments": "/home/evgenii/anaconda3/envs/hf_ai_vs_ai/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.exe --run-id=soccer_twos_new_params_2 --no-graphics --resume", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.1+cu121", "numpy_version": "1.23.5", "end_time_seconds": "1721556975" }, "total": 50721.524793640994, "count": 1, "self": 0.14538161399104865, "children": { "run_training.setup": { "total": 0.010499015999997141, "count": 1, "self": 0.010499015999997141 }, "TrainerController.start_learning": { "total": 50721.368913011, "count": 1, "self": 40.32742787305324, "children": { "TrainerController._reset_env": { "total": 10.895637416977735, "count": 443, "self": 10.895637416977735 }, "TrainerController.advance": { "total": 50670.073678110966, "count": 2957288, "self": 38.987091936673096, "children": { "env_step": { "total": 35890.15141850371, "count": 2957288, "self": 24995.95270060031, "children": { "SubprocessEnvManager._take_step": { "total": 10870.538461954828, "count": 2957288, "self": 260.20675206795386, "children": { "TorchPolicy.evaluate": { "total": 10610.331709886874, "count": 5668764, "self": 10610.331709886874 } } }, "workers": { "total": 23.660255948567254, "count": 2957288, "self": 0.0, "children": { "worker_root": { "total": 50674.83583157081, "count": 2957288, "is_parallel": true, "self": 30572.8380460856, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0016546309999938558, "count": 2, "is_parallel": true, "self": 0.0004016339999708407, "children": { "_process_rank_one_or_two_observation": { "total": 0.001252997000023015, "count": 8, "is_parallel": true, "self": 0.001252997000023015 } } }, "UnityEnvironment.step": { "total": 0.017331919000000084, "count": 1, "is_parallel": true, "self": 0.0003651530000041703, "children": { "UnityEnvironment._generate_step_input": { "total": 0.0002992849999969849, "count": 1, "is_parallel": true, "self": 0.0002992849999969849 }, "communicator.exchange": { "total": 0.015516317999995977, "count": 1, "is_parallel": true, "self": 0.015516317999995977 }, "steps_from_proto": { "total": 0.0011511630000029527, "count": 2, "is_parallel": true, "self": 0.00023563400003467905, "children": { "_process_rank_one_or_two_observation": { "total": 0.0009155289999682736, "count": 8, "is_parallel": true, "self": 0.0009155289999682736 } } } } } } }, "UnityEnvironment.step": { "total": 20101.4420077292, "count": 2957287, "is_parallel": true, "self": 1116.2354815398357, "children": { "UnityEnvironment._generate_step_input": { "total": 790.6202327370926, "count": 2957287, "is_parallel": true, "self": 790.6202327370926 }, "communicator.exchange": { "total": 14695.331226819619, "count": 2957287, "is_parallel": true, "self": 14695.331226819619 }, "steps_from_proto": { "total": 3499.2550666326565, "count": 5914574, "is_parallel": true, "self": 690.9443570288458, "children": { "_process_rank_one_or_two_observation": { "total": 2808.3107096038107, "count": 23658296, "is_parallel": true, "self": 2808.3107096038107 } } } } }, "steps_from_proto": { "total": 0.5557777560102863, "count": 884, "is_parallel": true, "self": 0.10960906797984649, "children": { "_process_rank_one_or_two_observation": { "total": 0.44616868803043985, "count": 3536, "is_parallel": true, "self": 0.44616868803043985 } } } } } } } } }, "trainer_advance": { "total": 14740.935167670585, "count": 2957288, "self": 321.25452261297687, "children": { "process_trajectory": { "total": 4036.5741473277353, "count": 2957288, "self": 4030.5582993397547, "children": { "RLTrainer._checkpoint": { "total": 6.015847987980351, "count": 88, "self": 6.015847987980351 } } }, "_update_policy": { "total": 10383.106497729872, "count": 1074, "self": 6584.034126425093, "children": { "TorchPOCAOptimizer.update": { "total": 3799.0723713047796, "count": 53700, "self": 3799.0723713047796 } } } } } } }, "trainer_threads": { "total": 6.639966159127653e-07, "count": 1, "self": 6.639966159127653e-07 }, "TrainerController._save_models": { "total": 0.07216894600423984, "count": 1, "self": 0.00040278300730278715, "children": { "RLTrainer._checkpoint": { "total": 0.07176616299693706, "count": 1, "self": 0.07176616299693706 } } } } } } }