Update_agent

c56af51 over 1 year ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.4647605419158936,
	"min": 2.4436533451080322,
	"max": 2.6065003871917725,
	"count": 75
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 49452.95703125,
	"min": 42656.33203125,
	"max": 57644.62109375,
	"count": 75
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 49.34020618556701,
	"min": 43.92727272727273,
	"max": 86.55,
	"count": 75
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19144.0,
	"min": 18468.0,
	"max": 20828.0,
	"count": 75
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1446.3990784066912,
	"min": 1407.1527249916471,
	"max": 1466.4331764449346,
	"count": 75
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 280601.4212108981,
	"min": 170986.03853388596,
	"max": 318622.23212539963,
	"count": 75
	},
	"SoccerTwos.Step.mean": {
	"value": 6499934.0,
	"min": 5759934.0,
	"max": 6499934.0,
	"count": 75
	},
	"SoccerTwos.Step.sum": {
	"value": 6499934.0,
	"min": 5759934.0,
	"max": 6499934.0,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.06069766357541084,
	"min": -0.028229886665940285,
	"max": 0.08104612678289413,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 11.836044311523438,
	"min": -4.190831184387207,
	"max": 13.453786849975586,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.05713004246354103,
	"min": -0.03292163461446762,
	"max": 0.07865509390830994,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 11.140357971191406,
	"min": -4.719839096069336,
	"max": 13.713396072387695,
	"count": 75
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 75
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.15984615301474547,
	"min": -0.2143614891893375,
	"max": 0.23290133158365886,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 31.169999837875366,
	"min": -34.51219975948334,
	"max": 34.93519973754883,
	"count": 75
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.15984615301474547,
	"min": -0.2143614891893375,
	"max": 0.23290133158365886,
	"count": 75
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 31.169999837875366,
	"min": -34.51219975948334,
	"max": 34.93519973754883,
	"count": 75
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 75
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 75
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.014913784746204329,
	"min": 0.012705877462091545,
	"max": 0.02106438890332356,
	"count": 36
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.014913784746204329,
	"min": 0.012705877462091545,
	"max": 0.02106438890332356,
	"count": 36
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.08063839798172315,
	"min": 0.06412938882907232,
	"max": 0.08411871741215388,
	"count": 36
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.08063839798172315,
	"min": 0.06412938882907232,
	"max": 0.08411871741215388,
	"count": 36
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.08215803081790606,
	"min": 0.06557651174565156,
	"max": 0.08553109541535378,
	"count": 36
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.08215803081790606,
	"min": 0.06557651174565156,
	"max": 0.08553109541535378,
	"count": 36
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 36
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 36
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 36
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 36
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 36
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 36
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1690888271",
	"python_version": "3.10.6 (main, May 29 2023, 11:10:38) [GCC 11.3.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume --torch-device=cpu",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1690891406"
	},
	"total": 3134.5549977269984,
	"count": 1,
	"self": 0.5773417409982358,
	"children": {
	"run_training.setup": {
	"total": 0.041487718999633216,
	"count": 1,
	"self": 0.041487718999633216
	},
	"TrainerController.start_learning": {
	"total": 3133.9361682670005,
	"count": 1,
	"self": 2.2835163210475002,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.2968337049987895,
	"count": 5,
	"self": 1.2968337049987895
	},
	"TrainerController.advance": {
	"total": 3130.138600297956,
	"count": 52092,
	"self": 2.239194006268008,
	"children": {
	"env_step": {
	"total": 1712.5569826390547,
	"count": 52092,
	"self": 1448.387999565768,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 262.8882053682264,
	"count": 52092,
	"self": 12.37939927491243,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 250.50880609331398,
	"count": 94230,
	"self": 250.50880609331398
	}
	}
	},
	"workers": {
	"total": 1.2807777050602454,
	"count": 52092,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3117.428293702631,
	"count": 52092,
	"is_parallel": true,
	"self": 1911.5819695236896,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.003298657000414096,
	"count": 2,
	"is_parallel": true,
	"self": 0.0008504910019837553,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0024481659984303406,
	"count": 8,
	"is_parallel": true,
	"self": 0.0024481659984303406
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.046987857998828986,
	"count": 1,
	"is_parallel": true,
	"self": 0.001370382000459358,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0010125909993803361,
	"count": 1,
	"is_parallel": true,
	"self": 0.0010125909993803361
	},
	"communicator.exchange": {
	"total": 0.040281173000039416,
	"count": 1,
	"is_parallel": true,
	"self": 0.040281173000039416
	},
	"steps_from_proto": {
	"total": 0.004323711998949875,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007712519964115927,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0035524600025382824,
	"count": 8,
	"is_parallel": true,
	"self": 0.0035524600025382824
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.011108942999271676,
	"count": 8,
	"is_parallel": true,
	"self": 0.0020862129968008958,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00902273000247078,
	"count": 32,
	"is_parallel": true,
	"self": 0.00902273000247078
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1205.835215235942,
	"count": 52091,
	"is_parallel": true,
	"self": 71.90485385938155,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 38.48136216392413,
	"count": 52091,
	"is_parallel": true,
	"self": 38.48136216392413
	},
	"communicator.exchange": {
	"total": 868.8257394927841,
	"count": 52091,
	"is_parallel": true,
	"self": 868.8257394927841
	},
	"steps_from_proto": {
	"total": 226.62325971985229,
	"count": 104182,
	"is_parallel": true,
	"self": 41.60843442960868,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 185.0148252902436,
	"count": 416728,
	"is_parallel": true,
	"self": 185.0148252902436
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1415.3424236526334,
	"count": 52092,
	"self": 15.513245858084701,
	"children": {
	"process_trajectory": {
	"total": 282.79064784255206,
	"count": 52092,
	"self": 282.21467366855177,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.5759741740002937,
	"count": 2,
	"self": 0.5759741740002937
	}
	}
	},
	"_update_policy": {
	"total": 1117.0385299519967,
	"count": 36,
	"self": 198.22306532695438,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 918.8154646250423,
	"count": 1080,
	"self": 918.8154646250423
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.1209995136596262e-06,
	"count": 1,
	"self": 1.1209995136596262e-06
	},
	"TrainerController._save_models": {
	"total": 0.2172168219985906,
	"count": 1,
	"self": 0.0020243499966454692,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.21519247200194513,
	"count": 1,
	"self": 0.21519247200194513
	}
	}
	}
	}
	}
	}
	}