atorre's picture
70M steps
da44bbc
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 0.7599584460258484,
"min": 0.7599584460258484,
"max": 1.1046671867370605,
"count": 1360
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 14688.4765625,
"min": 13102.9404296875,
"max": 24072.90625,
"count": 1360
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 52.329787234042556,
"min": 44.76146788990825,
"max": 100.91836734693878,
"count": 1360
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19676.0,
"min": 17292.0,
"max": 21412.0,
"count": 1360
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 2007.2244566922054,
"min": 1928.0461427939483,
"max": 2021.3323120366388,
"count": 1360
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 377358.19785813463,
"min": 192663.0863525974,
"max": 427977.8272378125,
"count": 1360
},
"SoccerTwos.Step.mean": {
"value": 69999996.0,
"min": 56409899.0,
"max": 69999996.0,
"count": 1360
},
"SoccerTwos.Step.sum": {
"value": 69999996.0,
"min": 56409899.0,
"max": 69999996.0,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.0012385876616463065,
"min": -0.11361774057149887,
"max": 0.057988058775663376,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.23161590099334717,
"min": -17.383514404296875,
"max": 8.756196975708008,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.0018490327056497335,
"min": -0.11377845704555511,
"max": 0.05853987857699394,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.34576910734176636,
"min": -17.408103942871094,
"max": 8.839521408081055,
"count": 1360
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1360
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.028980749813630618,
"min": -0.3808833339384624,
"max": 0.33065945554423976,
"count": 1360
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 5.419400215148926,
"min": -63.98840010166168,
"max": 54.22040015459061,
"count": 1360
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.028980749813630618,
"min": -0.3808833339384624,
"max": 0.33065945554423976,
"count": 1360
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 5.419400215148926,
"min": -63.98840010166168,
"max": 54.22040015459061,
"count": 1360
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1360
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1360
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.015010278671979904,
"min": 0.010116659845031488,
"max": 0.02512193746709575,
"count": 659
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.015010278671979904,
"min": 0.010116659845031488,
"max": 0.02512193746709575,
"count": 659
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.10901169503728549,
"min": 0.08509302685658136,
"max": 0.1227603184680144,
"count": 659
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.10901169503728549,
"min": 0.08509302685658136,
"max": 0.1227603184680144,
"count": 659
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10926606232921282,
"min": 0.08495384007692337,
"max": 0.12262502461671829,
"count": 659
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10926606232921282,
"min": 0.08495384007692337,
"max": 0.12262502461671829,
"count": 659
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 5.9294265982841634e-08,
"min": 5.9294265982841634e-08,
"max": 5.8191972031274264e-05,
"count": 659
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 5.9294265982841634e-08,
"min": 5.9294265982841634e-08,
"max": 5.8191972031274264e-05,
"count": 659
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.10001973142857144,
"min": 0.10001973142857144,
"max": 0.11939729714285716,
"count": 659
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.10001973142857144,
"min": 0.10001973142857144,
"max": 0.11939729714285716,
"count": 659
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.0984598285714026e-05,
"min": 1.0984598285714026e-05,
"max": 0.0009779251274285715,
"count": 659
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.0984598285714026e-05,
"min": 1.0984598285714026e-05,
"max": 0.0009779251274285715,
"count": 659
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676149490",
"python_version": "3.9.16 (main, Jan 11 2023, 10:02:19) \n[Clang 14.0.6 ]",
"command_line_arguments": "/Users/sasha/opt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0",
"numpy_version": "1.21.2",
"end_time_seconds": "1676229443"
},
"total": 79934.386987849,
"count": 1,
"self": 1.463982158980798,
"children": {
"run_training.setup": {
"total": 0.029795925999999806,
"count": 1,
"self": 0.029795925999999806
},
"TrainerController.start_learning": {
"total": 79932.893209764,
"count": 1,
"self": 22.659327701388975,
"children": {
"TrainerController._reset_env": {
"total": 3.753083792983583,
"count": 69,
"self": 3.753083792983583
},
"TrainerController.advance": {
"total": 79906.27999352764,
"count": 935721,
"self": 20.78227292036172,
"children": {
"env_step": {
"total": 14508.591948072974,
"count": 935721,
"self": 11784.861138080418,
"children": {
"SubprocessEnvManager._take_step": {
"total": 2710.057035800044,
"count": 935721,
"self": 108.59541547363824,
"children": {
"TorchPolicy.evaluate": {
"total": 2601.461620326406,
"count": 1707118,
"self": 2601.461620326406
}
}
},
"workers": {
"total": 13.67377419251196,
"count": 935721,
"self": 0.0,
"children": {
"worker_root": {
"total": 79890.60433427706,
"count": 935721,
"is_parallel": true,
"self": 70382.08113162765,
"children": {
"steps_from_proto": {
"total": 0.12149360603329962,
"count": 138,
"is_parallel": true,
"self": 0.02646556405556577,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.09502804197773385,
"count": 552,
"is_parallel": true,
"self": 0.09502804197773385
}
}
},
"UnityEnvironment.step": {
"total": 9508.401709043377,
"count": 935721,
"is_parallel": true,
"self": 578.9504819536596,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 372.7656325183688,
"count": 935721,
"is_parallel": true,
"self": 372.7656325183688
},
"communicator.exchange": {
"total": 6971.265072172604,
"count": 935721,
"is_parallel": true,
"self": 6971.265072172604
},
"steps_from_proto": {
"total": 1585.420522398744,
"count": 1871442,
"is_parallel": true,
"self": 350.8020063299639,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1234.61851606878,
"count": 7485768,
"is_parallel": true,
"self": 1234.61851606878
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 65376.90577253431,
"count": 935721,
"self": 173.96376637272624,
"children": {
"process_trajectory": {
"total": 4682.1723722196675,
"count": 935721,
"self": 4676.241945867688,
"children": {
"RLTrainer._checkpoint": {
"total": 5.930426351980145,
"count": 28,
"self": 5.930426351980145
}
}
},
"_update_policy": {
"total": 60520.76963394191,
"count": 659,
"self": 2212.03285398805,
"children": {
"TorchPOCAOptimizer.update": {
"total": 58308.73677995386,
"count": 19770,
"self": 58308.73677995386
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.040078682824969e-07,
"count": 1,
"self": 8.040078682824969e-07
},
"TrainerController._save_models": {
"total": 0.2008039379870752,
"count": 1,
"self": 0.0018771709874272346,
"children": {
"RLTrainer._checkpoint": {
"total": 0.19892676699964795,
"count": 1,
"self": 0.19892676699964795
}
}
}
}
}
}
}