poca-SoccerTwos / run_logs /timers.json
BenLearningRL's picture
First Push`
598292c
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.6419099569320679,
"min": 1.6419099569320679,
"max": 3.295757293701172,
"count": 583
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 30841.63671875,
"min": 12586.9140625,
"max": 105464.234375,
"count": 583
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 47.84313725490196,
"min": 38.13953488372093,
"max": 999.0,
"count": 583
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19520.0,
"min": 14620.0,
"max": 27144.0,
"count": 583
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1589.8209476614618,
"min": 1195.0311493738434,
"max": 1624.212424863242,
"count": 578
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 324323.4733229382,
"min": 2391.5608596359775,
"max": 371390.8554079746,
"count": 578
},
"SoccerTwos.Step.mean": {
"value": 5829938.0,
"min": 9588.0,
"max": 5829938.0,
"count": 583
},
"SoccerTwos.Step.sum": {
"value": 5829938.0,
"min": 9588.0,
"max": 5829938.0,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.031814150512218475,
"min": -0.12275109440088272,
"max": 0.16648373007774353,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -6.5219011306762695,
"min": -24.427467346191406,
"max": 24.573871612548828,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.03891433775424957,
"min": -0.12549638748168945,
"max": 0.16274768114089966,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -7.977438926696777,
"min": -24.97378158569336,
"max": 25.92108154296875,
"count": 583
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 583
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.11726341596463831,
"min": -0.7142857142857143,
"max": 0.5206204090191393,
"count": 583
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -24.039000272750854,
"min": -62.41359996795654,
"max": 65.96000003814697,
"count": 583
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.11726341596463831,
"min": -0.7142857142857143,
"max": 0.5206204090191393,
"count": 583
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -24.039000272750854,
"min": -62.41359996795654,
"max": 65.96000003814697,
"count": 583
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 583
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 583
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.015221605898113921,
"min": 0.01092262822912744,
"max": 0.023748412328617026,
"count": 280
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.015221605898113921,
"min": 0.01092262822912744,
"max": 0.023748412328617026,
"count": 280
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.11137958690524101,
"min": 0.0008556886556713531,
"max": 0.12397084087133407,
"count": 280
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.11137958690524101,
"min": 0.0008556886556713531,
"max": 0.12397084087133407,
"count": 280
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.11331250021855037,
"min": 0.0008961730782175437,
"max": 0.12599406515558562,
"count": 280
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.11331250021855037,
"min": 0.0008961730782175437,
"max": 0.12599406515558562,
"count": 280
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 280
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 280
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 280
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 280
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 280
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 280
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1701285198",
"python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "\\\\?\\C:\\Users\\user\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.1.1+cpu",
"numpy_version": "1.23.5",
"end_time_seconds": "1701333182"
},
"total": 47982.489849000005,
"count": 1,
"self": 0.24068759998772293,
"children": {
"run_training.setup": {
"total": 0.12456259998725727,
"count": 1,
"self": 0.12456259998725727
},
"TrainerController.start_learning": {
"total": 47982.12459880003,
"count": 1,
"self": 27.5780346042593,
"children": {
"TrainerController._reset_env": {
"total": 9.155859399819747,
"count": 30,
"self": 9.155859399819747
},
"TrainerController.advance": {
"total": 47944.85520439595,
"count": 399099,
"self": 29.803272486140486,
"children": {
"env_step": {
"total": 20304.866591629398,
"count": 399099,
"self": 14576.87355039746,
"children": {
"SubprocessEnvManager._take_step": {
"total": 5712.371078997327,
"count": 399099,
"self": 165.52294571546372,
"children": {
"TorchPolicy.evaluate": {
"total": 5546.848133281863,
"count": 735372,
"self": 5546.848133281863
}
}
},
"workers": {
"total": 15.621962234610692,
"count": 399099,
"self": 0.0,
"children": {
"worker_root": {
"total": 47936.45714417251,
"count": 399099,
"is_parallel": true,
"self": 36596.77760363015,
"children": {
"steps_from_proto": {
"total": 0.15410140017047524,
"count": 60,
"is_parallel": true,
"self": 0.03087789920391515,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.1232235009665601,
"count": 240,
"is_parallel": true,
"self": 0.1232235009665601
}
}
},
"UnityEnvironment.step": {
"total": 11339.525439142191,
"count": 399099,
"is_parallel": true,
"self": 585.7096071103006,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 507.8765655910829,
"count": 399099,
"is_parallel": true,
"self": 507.8765655910829
},
"communicator.exchange": {
"total": 8226.763018726895,
"count": 399099,
"is_parallel": true,
"self": 8226.763018726895
},
"steps_from_proto": {
"total": 2019.1762477139127,
"count": 798198,
"is_parallel": true,
"self": 389.7491419397411,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1629.4271057741717,
"count": 3192792,
"is_parallel": true,
"self": 1629.4271057741717
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 27610.18534028041,
"count": 399099,
"self": 161.50414524361258,
"children": {
"process_trajectory": {
"total": 5946.397217437276,
"count": 399099,
"self": 5941.642507737386,
"children": {
"RLTrainer._checkpoint": {
"total": 4.754709699889645,
"count": 11,
"self": 4.754709699889645
}
}
},
"_update_policy": {
"total": 21502.283977599523,
"count": 281,
"self": 2456.635687999951,
"children": {
"TorchPOCAOptimizer.update": {
"total": 19045.648289599572,
"count": 8403,
"self": 19045.648289599572
}
}
}
}
}
}
},
"trainer_threads": {
"total": 3.8000289350748062e-06,
"count": 1,
"self": 3.8000289350748062e-06
},
"TrainerController._save_models": {
"total": 0.5354965999722481,
"count": 1,
"self": 0.007501599960960448,
"children": {
"RLTrainer._checkpoint": {
"total": 0.5279950000112876,
"count": 1,
"self": 0.5279950000112876
}
}
}
}
}
}
}