Update_agent

b2b7ee2 over 1 year ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.9653992652893066,
	"min": 2.928091049194336,
	"max": 3.089735984802246,
	"count": 75
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 55322.48828125,
	"min": 30992.189453125,
	"max": 94720.8671875,
	"count": 75
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 258.5263157894737,
	"min": 126.6923076923077,
	"max": 539.2222222222222,
	"count": 75
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19648.0,
	"min": 16400.0,
	"max": 23072.0,
	"count": 75
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1288.917142349652,
	"min": 1265.364925775114,
	"max": 1319.9948589635467,
	"count": 75
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 46401.01712458747,
	"min": 17796.887486172378,
	"max": 99141.74676083495,
	"count": 75
	},
	"SoccerTwos.Step.mean": {
	"value": 4249607.0,
	"min": 3509874.0,
	"max": 4249607.0,
	"count": 75
	},
	"SoccerTwos.Step.sum": {
	"value": 4249607.0,
	"min": 3509874.0,
	"max": 4249607.0,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.033912427723407745,
	"min": -0.02116245962679386,
	"max": 0.08301462978124619,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 1.322584629058838,
	"min": -1.5871844291687012,
	"max": 4.439652919769287,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.035285141319036484,
	"min": -0.031118163838982582,
	"max": 0.0905672013759613,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1.3761205673217773,
	"min": -2.3338623046875,
	"max": 5.524599075317383,
	"count": 75
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 75
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.03541025595787244,
	"min": -0.5060222225470675,
	"max": 0.3610142873866217,
	"count": 75
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -1.3809999823570251,
	"min": -19.56760013103485,
	"max": 17.24120005965233,
	"count": 75
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.03541025595787244,
	"min": -0.5060222225470675,
	"max": 0.3610142873866217,
	"count": 75
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -1.3809999823570251,
	"min": -19.56760013103485,
	"max": 17.24120005965233,
	"count": 75
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 75
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 75
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.016938292739602425,
	"min": 0.010926229831723807,
	"max": 0.02053702191139261,
	"count": 35
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.016938292739602425,
	"min": 0.010926229831723807,
	"max": 0.02053702191139261,
	"count": 35
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.026586440950632097,
	"min": 0.012621084662775199,
	"max": 0.03861811061700185,
	"count": 35
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.026586440950632097,
	"min": 0.012621084662775199,
	"max": 0.03861811061700185,
	"count": 35
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.027341894370814166,
	"min": 0.012977093706528347,
	"max": 0.03994991021851699,
	"count": 35
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.027341894370814166,
	"min": 0.012977093706528347,
	"max": 0.03994991021851699,
	"count": 35
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 35
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 35
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 35
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 35
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 35
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 35
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1690878657",
	"python_version": "3.10.6 (main, May 29 2023, 11:10:38) [GCC 11.3.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume --torch-device=cpu",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1690881716"
	},
	"total": 3059.7732766870004,
	"count": 1,
	"self": 1.0997174930007532,
	"children": {
	"run_training.setup": {
	"total": 0.04625938999970458,
	"count": 1,
	"self": 0.04625938999970458
	},
	"TrainerController.start_learning": {
	"total": 3058.627299804,
	"count": 1,
	"self": 1.995636533916695,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.7464096760004395,
	"count": 5,
	"self": 1.7464096760004395
	},
	"TrainerController.advance": {
	"total": 3054.504729847082,
	"count": 49247,
	"self": 2.1494134940912772,
	"children": {
	"env_step": {
	"total": 1728.580282753981,
	"count": 49247,
	"self": 1451.0359896730588,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 276.30703805185794,
	"count": 49247,
	"self": 14.137087277108094,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 262.16995077474985,
	"count": 95670,
	"self": 262.16995077474985
	}
	}
	},
	"workers": {
	"total": 1.237255029064272,
	"count": 49247,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3040.5957244619613,
	"count": 49247,
	"is_parallel": true,
	"self": 1839.4196168309827,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.004600864000167348,
	"count": 2,
	"is_parallel": true,
	"self": 0.0013356340009522683,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00326522999921508,
	"count": 8,
	"is_parallel": true,
	"self": 0.00326522999921508
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.05661161500029266,
	"count": 1,
	"is_parallel": true,
	"self": 0.0014208450006663043,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0015787909997015959,
	"count": 1,
	"is_parallel": true,
	"self": 0.0015787909997015959
	},
	"communicator.exchange": {
	"total": 0.04902488400011862,
	"count": 1,
	"is_parallel": true,
	"self": 0.04902488400011862
	},
	"steps_from_proto": {
	"total": 0.0045870949998061405,
	"count": 2,
	"is_parallel": true,
	"self": 0.0008254869999291259,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0037616079998770147,
	"count": 8,
	"is_parallel": true,
	"self": 0.0037616079998770147
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.012616636001439474,
	"count": 8,
	"is_parallel": true,
	"self": 0.0025250890007555427,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.010091547000683931,
	"count": 32,
	"is_parallel": true,
	"self": 0.010091547000683931
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1201.163490994977,
	"count": 49246,
	"is_parallel": true,
	"self": 71.97562404471091,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 37.76953575507514,
	"count": 49246,
	"is_parallel": true,
	"self": 37.76953575507514
	},
	"communicator.exchange": {
	"total": 862.6202264811259,
	"count": 49246,
	"is_parallel": true,
	"self": 862.6202264811259
	},
	"steps_from_proto": {
	"total": 228.79810471406518,
	"count": 98492,
	"is_parallel": true,
	"self": 41.2123752680468,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 187.58572944601838,
	"count": 393968,
	"is_parallel": true,
	"self": 187.58572944601838
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1323.77503359901,
	"count": 49247,
	"self": 16.65251782910582,
	"children": {
	"process_trajectory": {
	"total": 211.0241680079057,
	"count": 49247,
	"self": 210.79081530890608,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2333526989996244,
	"count": 1,
	"self": 0.2333526989996244
	}
	}
	},
	"_update_policy": {
	"total": 1096.0983477619984,
	"count": 35,
	"self": 203.32014813700744,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 892.778199624991,
	"count": 1050,
	"self": 892.778199624991
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 2.0180004867142998e-06,
	"count": 1,
	"self": 2.0180004867142998e-06
	},
	"TrainerController._save_models": {
	"total": 0.38052172900006553,
	"count": 1,
	"self": 0.0025554750000083004,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.37796625400005723,
	"count": 1,
	"self": 0.37796625400005723
	}
	}
	}
	}
	}
	}
	}