First Push

11a2c03 verified 7 months ago

20.1 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 3.145423650741577,
	"min": 1.8976856470108032,
	"max": 3.2957332134246826,
	"count": 1617
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 94614.34375,
	"min": 14747.623046875,
	"max": 148500.125,
	"count": 1617
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 835.0,
	"min": 402.0,
	"max": 999.0,
	"count": 1617
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 20040.0,
	"min": 10652.0,
	"max": 31928.0,
	"count": 1617
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1180.6471335450399,
	"min": 1146.2339914532424,
	"max": 1198.7535925691843,
	"count": 821
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 7083.882801270239,
	"min": 2292.467982906485,
	"max": 23809.432211595293,
	"count": 821
	},
	"SoccerTwos.Step.mean": {
	"value": 16169561.0,
	"min": 9982.0,
	"max": 16169561.0,
	"count": 1617
	},
	"SoccerTwos.Step.sum": {
	"value": 16169561.0,
	"min": 9982.0,
	"max": 16169561.0,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.0029534988570958376,
	"min": -0.07831713557243347,
	"max": 0.014796069823205471,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -0.035441987216472626,
	"min": -1.0963337421417236,
	"max": 0.251533180475235,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.002912085270509124,
	"min": -0.07828854024410248,
	"max": 0.014832121320068836,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -0.03494502231478691,
	"min": -1.095876693725586,
	"max": 0.2784641981124878,
	"count": 1617
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1617
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.08931666721279423,
	"min": -0.6428571428571429,
	"max": 0.3695625017862767,
	"count": 1617
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -1.0718000065535307,
	"min": -9.821200013160706,
	"max": 5.913000028580427,
	"count": 1617
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.08931666721279423,
	"min": -0.6428571428571429,
	"max": 0.3695625017862767,
	"count": 1617
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -1.0718000065535307,
	"min": -9.821200013160706,
	"max": 5.913000028580427,
	"count": 1617
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1617
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1617
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.02138557273040836,
	"min": 0.010244516965515989,
	"max": 0.02537590628877903,
	"count": 748
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.02138557273040836,
	"min": 0.010244516965515989,
	"max": 0.02537590628877903,
	"count": 748
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 1.8619036211475758e-06,
	"min": 1.0458391153100877e-09,
	"max": 0.008836940381055076,
	"count": 748
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 1.8619036211475758e-06,
	"min": 1.0458391153100877e-09,
	"max": 0.008836940381055076,
	"count": 748
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 1.912558618262968e-06,
	"min": 1.1789494972861784e-09,
	"max": 0.00915419169080754,
	"count": 748
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 1.912558618262968e-06,
	"min": 1.1789494972861784e-09,
	"max": 0.00915419169080754,
	"count": 748
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 748
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 748
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 748
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 748
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 748
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 748
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1716968595",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/user/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1717064626"
	},
	"total": 94847.630679923,
	"count": 1,
	"self": 0.20160856099391822,
	"children": {
	"run_training.setup": {
	"total": 0.020598568999957934,
	"count": 1,
	"self": 0.020598568999957934
	},
	"TrainerController.start_learning": {
	"total": 94847.408472793,
	"count": 1,
	"self": 29.471138336739386,
	"children": {
	"TrainerController._reset_env": {
	"total": 11.30946469198716,
	"count": 81,
	"self": 11.30946469198716
	},
	"TrainerController.advance": {
	"total": 94806.34087721028,
	"count": 1052407,
	"self": 31.4315624179726,
	"children": {
	"env_step": {
	"total": 20936.987311033437,
	"count": 1052407,
	"self": 15893.566943548358,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 5023.696336873699,
	"count": 1052407,
	"self": 187.9025867474311,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 4835.793750126268,
	"count": 2090378,
	"self": 4835.793750126268
	}
	}
	},
	"workers": {
	"total": 19.72403061137993,
	"count": 1052406,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 94790.56055745442,
	"count": 1052406,
	"is_parallel": true,
	"self": 82165.24918971091,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.003582917000130692,
	"count": 2,
	"is_parallel": true,
	"self": 0.001412645000982593,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.002170271999148099,
	"count": 8,
	"is_parallel": true,
	"self": 0.002170271999148099
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.028860425999937434,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005274099999041937,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00045789799969497835,
	"count": 1,
	"is_parallel": true,
	"self": 0.00045789799969497835
	},
	"communicator.exchange": {
	"total": 0.026198995999948238,
	"count": 1,
	"is_parallel": true,
	"self": 0.026198995999948238
	},
	"steps_from_proto": {
	"total": 0.0016761220003900235,
	"count": 2,
	"is_parallel": true,
	"self": 0.0003571830006876553,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013189389997023682,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013189389997023682
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 12625.131369670502,
	"count": 1052405,
	"is_parallel": true,
	"self": 652.73928951362,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 457.4250036561489,
	"count": 1052405,
	"is_parallel": true,
	"self": 457.4250036561489
	},
	"communicator.exchange": {
	"total": 9403.652389351964,
	"count": 1052405,
	"is_parallel": true,
	"self": 9403.652389351964
	},
	"steps_from_proto": {
	"total": 2111.3146871487706,
	"count": 2104810,
	"is_parallel": true,
	"self": 427.4679599568731,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1683.8467271918976,
	"count": 8419240,
	"is_parallel": true,
	"self": 1683.8467271918976
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.17999807300520843,
	"count": 160,
	"is_parallel": true,
	"self": 0.03715480606524579,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.14284326693996263,
	"count": 640,
	"is_parallel": true,
	"self": 0.14284326693996263
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 73837.92200375887,
	"count": 1052406,
	"self": 221.0813891531434,
	"children": {
	"process_trajectory": {
	"total": 4764.590263265625,
	"count": 1052406,
	"self": 4756.58690631963,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 8.00335694599562,
	"count": 32,
	"self": 8.00335694599562
	}
	}
	},
	"_update_policy": {
	"total": 68852.2503513401,
	"count": 748,
	"self": 2333.930492313637,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 66518.31985902647,
	"count": 22443,
	"self": 66518.31985902647
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 2.344997483305633e-06,
	"count": 1,
	"self": 2.344997483305633e-06
	},
	"TrainerController._save_models": {
	"total": 0.2869902089878451,
	"count": 1,
	"self": 0.005619421994197182,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2813707869936479,
	"count": 1,
	"self": 0.2813707869936479
	}
	}
	}
	}
	}
	}
	}