Update_agent

1b0c326 over 1 year ago

20.1 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.6478638648986816,
	"min": 2.64497971534729,
	"max": 2.910040855407715,
	"count": 99
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 56939.6640625,
	"min": 39565.4296875,
	"max": 75923.484375,
	"count": 99
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 64.56756756756756,
	"min": 58.23809523809524,
	"max": 241.95,
	"count": 99
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19112.0,
	"min": 13404.0,
	"max": 22252.0,
	"count": 99
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1412.0522756934085,
	"min": 1323.6065140054525,
	"max": 1425.1974969013027,
	"count": 99
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 208983.73680262448,
	"min": 53079.97955308974,
	"max": 238000.19807624962,
	"count": 99
	},
	"SoccerTwos.Step.mean": {
	"value": 5749994.0,
	"min": 4769938.0,
	"max": 5749994.0,
	"count": 99
	},
	"SoccerTwos.Step.sum": {
	"value": 5749994.0,
	"min": 4769938.0,
	"max": 5749994.0,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.028790147975087166,
	"min": -0.04101934656500816,
	"max": 0.09027662128210068,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 4.260941982269287,
	"min": -5.045379638671875,
	"max": 11.24030876159668,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.032357506453990936,
	"min": -0.03689615800976753,
	"max": 0.08805786818265915,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 4.788910865783691,
	"min": -4.538227558135986,
	"max": 10.536680221557617,
	"count": 99
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 99
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.09212432117075534,
	"min": -0.3625368428857703,
	"max": 0.39829629714842196,
	"count": 99
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 13.63439953327179,
	"min": -33.321199893951416,
	"max": 43.01600009202957,
	"count": 99
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.09212432117075534,
	"min": -0.3625368428857703,
	"max": 0.39829629714842196,
	"count": 99
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 13.63439953327179,
	"min": -33.321199893951416,
	"max": 43.01600009202957,
	"count": 99
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 99
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 99
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.015930675513421495,
	"min": 0.011518537097920974,
	"max": 0.02257696205178945,
	"count": 47
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.015930675513421495,
	"min": 0.011518537097920974,
	"max": 0.02257696205178945,
	"count": 47
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.06051712979873022,
	"min": 0.03329648443808158,
	"max": 0.06932095438241959,
	"count": 47
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.06051712979873022,
	"min": 0.03329648443808158,
	"max": 0.06932095438241959,
	"count": 47
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.0617528443535169,
	"min": 0.03377765975892544,
	"max": 0.07101012269655864,
	"count": 47
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.0617528443535169,
	"min": 0.03377765975892544,
	"max": 0.07101012269655864,
	"count": 47
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 47
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 47
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 47
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 47
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 47
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 47
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1690883855",
	"python_version": "3.10.6 (main, May 29 2023, 11:10:38) [GCC 11.3.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume --torch-device=cpu",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1690887963"
	},
	"total": 4107.853062852,
	"count": 1,
	"self": 0.5270315910001955,
	"children": {
	"run_training.setup": {
	"total": 0.04542847300035646,
	"count": 1,
	"self": 0.04542847300035646
	},
	"TrainerController.start_learning": {
	"total": 4107.280602788,
	"count": 1,
	"self": 2.857072912118383,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.4553741960007756,
	"count": 6,
	"self": 1.4553741960007756
	},
	"TrainerController.advance": {
	"total": 4102.65817443788,
	"count": 66248,
	"self": 2.8340780267208174,
	"children": {
	"env_step": {
	"total": 2304.179882034283,
	"count": 66248,
	"self": 1944.8308511348696,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 357.67793084941513,
	"count": 66248,
	"self": 18.838583014250617,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 338.8393478351645,
	"count": 124596,
	"self": 338.8393478351645
	}
	}
	},
	"workers": {
	"total": 1.671100049998131,
	"count": 66248,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 4084.930824363075,
	"count": 66248,
	"is_parallel": true,
	"self": 2466.000282550036,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0033345209994877223,
	"count": 2,
	"is_parallel": true,
	"self": 0.0008165080016624415,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0025180129978252808,
	"count": 8,
	"is_parallel": true,
	"self": 0.0025180129978252808
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.0472815150005772,
	"count": 1,
	"is_parallel": true,
	"self": 0.0014041190006537363,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0009399139999004547,
	"count": 1,
	"is_parallel": true,
	"self": 0.0009399139999004547
	},
	"communicator.exchange": {
	"total": 0.04066125200006354,
	"count": 1,
	"is_parallel": true,
	"self": 0.04066125200006354
	},
	"steps_from_proto": {
	"total": 0.004276229999959469,
	"count": 2,
	"is_parallel": true,
	"self": 0.000782805000199005,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003493424999760464,
	"count": 8,
	"is_parallel": true,
	"self": 0.003493424999760464
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.016577086002143915,
	"count": 10,
	"is_parallel": true,
	"self": 0.0030401580024772556,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.01353692799966666,
	"count": 40,
	"is_parallel": true,
	"self": 0.01353692799966666
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1618.9139647270367,
	"count": 66247,
	"is_parallel": true,
	"self": 94.74951284725284,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 49.7337207289238,
	"count": 66247,
	"is_parallel": true,
	"self": 49.7337207289238
	},
	"communicator.exchange": {
	"total": 1173.826287938149,
	"count": 66247,
	"is_parallel": true,
	"self": 1173.826287938149
	},
	"steps_from_proto": {
	"total": 300.604443212711,
	"count": 132494,
	"is_parallel": true,
	"self": 54.332957335935134,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 246.27148587677584,
	"count": 529976,
	"is_parallel": true,
	"self": 246.27148587677584
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1795.6442143768763,
	"count": 66248,
	"self": 20.65638680877055,
	"children": {
	"process_trajectory": {
	"total": 319.03219946210083,
	"count": 66248,
	"self": 318.58547976910086,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.44671969299997727,
	"count": 2,
	"self": 0.44671969299997727
	}
	}
	},
	"_update_policy": {
	"total": 1455.955628106005,
	"count": 47,
	"self": 263.7310127390356,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 1192.2246153669694,
	"count": 1410,
	"self": 1192.2246153669694
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.7330003174720332e-06,
	"count": 1,
	"self": 1.7330003174720332e-06
	},
	"TrainerController._save_models": {
	"total": 0.3099795090001862,
	"count": 1,
	"self": 0.0022393920007743873,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.30774011699941184,
	"count": 1,
	"self": 0.30774011699941184
	}
	}
	}
	}
	}
	}
	}