First Push

1d39dc8 verified 11 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.409071922302246,
	"min": 0.580023467540741,
	"max": 3.2957353591918945,
	"count": 5000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 28406.890625,
	"min": 5988.8271484375,
	"max": 145126.765625,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 69.3972602739726,
	"min": 44.054545454545455,
	"max": 999.0,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 20264.0,
	"min": 12164.0,
	"max": 27920.0,
	"count": 5000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1542.8781649418193,
	"min": 1184.7955637147627,
	"max": 1625.1740943820848,
	"count": 3265
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 225260.21208150563,
	"min": 2374.3217125616216,
	"max": 351378.12929364503,
	"count": 3265
	},
	"SoccerTwos.Step.mean": {
	"value": 49999902.0,
	"min": 9976.0,
	"max": 49999902.0,
	"count": 5000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999902.0,
	"min": 9976.0,
	"max": 49999902.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.04849886894226074,
	"min": -0.14891868829727173,
	"max": 0.197387233376503,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -7.032336235046387,
	"min": -25.018339157104492,
	"max": 21.909982681274414,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.04817768931388855,
	"min": -0.14785706996917725,
	"max": 0.19496957957744598,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -6.985764980316162,
	"min": -24.83998680114746,
	"max": 21.64162254333496,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.14953103558770542,
	"min": -0.5992666681607565,
	"max": 0.4935333345617567,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -21.682000160217285,
	"min": -59.1327999830246,
	"max": 46.43540012836456,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.14953103558770542,
	"min": -0.5992666681607565,
	"max": 0.4935333345617567,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -21.682000160217285,
	"min": -59.1327999830246,
	"max": 46.43540012836456,
	"count": 5000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.018001814029412345,
	"min": 0.009255766456529575,
	"max": 0.026368796251093347,
	"count": 2370
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.018001814029412345,
	"min": 0.009255766456529575,
	"max": 0.026368796251093347,
	"count": 2370
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.08666058033704757,
	"min": 3.279833690518948e-11,
	"max": 0.11784214849273364,
	"count": 2370
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.08666058033704757,
	"min": 3.279833690518948e-11,
	"max": 0.11784214849273364,
	"count": 2370
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.08756576379140218,
	"min": 3.7002813429073043e-11,
	"max": 0.11892481545607249,
	"count": 2370
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.08756576379140218,
	"min": 3.7002813429073043e-11,
	"max": 0.11892481545607249,
	"count": 2370
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2370
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2370
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000004,
	"max": 0.20000000000000007,
	"count": 2370
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000004,
	"max": 0.20000000000000007,
	"count": 2370
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2370
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2370
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1705906520",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/root/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.2+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1706014077"
	},
	"total": 107556.39236933202,
	"count": 1,
	"self": 0.4893856640555896,
	"children": {
	"run_training.setup": {
	"total": 0.09685415698913857,
	"count": 1,
	"self": 0.09685415698913857
	},
	"TrainerController.start_learning": {
	"total": 107555.80612951098,
	"count": 1,
	"self": 56.057607389520854,
	"children": {
	"TrainerController._reset_env": {
	"total": 12.113171444681939,
	"count": 250,
	"self": 12.113171444681939
	},
	"TrainerController.advance": {
	"total": 107487.14703818178,
	"count": 3358224,
	"self": 53.167223570810165,
	"children": {
	"env_step": {
	"total": 47161.045734404994,
	"count": 3358224,
	"self": 37205.82435472682,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 9922.062924618716,
	"count": 3358224,
	"self": 372.13904499076307,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 9549.923879627953,
	"count": 6350498,
	"self": 9549.923879627953
	}
	}
	},
	"workers": {
	"total": 33.15845505945617,
	"count": 3358224,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 107466.25550758577,
	"count": 3358224,
	"is_parallel": true,
	"self": 77240.05800597148,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0024395579821430147,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007359739975072443,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0017035839846357703,
	"count": 8,
	"is_parallel": true,
	"self": 0.0017035839846357703
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.02229533198988065,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005469299503602087,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004575889906845987,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004575889906845987
	},
	"communicator.exchange": {
	"total": 0.019752520020119846,
	"count": 1,
	"is_parallel": true,
	"self": 0.019752520020119846
	},
	"steps_from_proto": {
	"total": 0.001538293028715998,
	"count": 2,
	"is_parallel": true,
	"self": 0.0003324220306240022,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0012058709980919957,
	"count": 8,
	"is_parallel": true,
	"self": 0.0012058709980919957
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 30225.796755509567,
	"count": 3358223,
	"is_parallel": true,
	"self": 1597.375346368237,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 1068.764909645426,
	"count": 3358223,
	"is_parallel": true,
	"self": 1068.764909645426
	},
	"communicator.exchange": {
	"total": 22985.073879790318,
	"count": 3358223,
	"is_parallel": true,
	"self": 22985.073879790318
	},
	"steps_from_proto": {
	"total": 4574.582619705587,
	"count": 6716446,
	"is_parallel": true,
	"self": 858.5062123311218,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 3716.076407374465,
	"count": 26865784,
	"is_parallel": true,
	"self": 3716.076407374465
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.40074610471492633,
	"count": 498,
	"is_parallel": true,
	"self": 0.07684651366434991,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.3238995910505764,
	"count": 1992,
	"is_parallel": true,
	"self": 0.3238995910505764
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 60272.934080205974,
	"count": 3358224,
	"self": 490.5562912662863,
	"children": {
	"process_trajectory": {
	"total": 9244.681141626206,
	"count": 3358224,
	"self": 9102.18898997939,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 142.49215164681664,
	"count": 100,
	"self": 142.49215164681664
	}
	}
	},
	"_update_policy": {
	"total": 50537.69664731348,
	"count": 2370,
	"self": 5545.043199257401,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 44992.65344805608,
	"count": 71109,
	"self": 44992.65344805608
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.729984983801842e-07,
	"count": 1,
	"self": 8.729984983801842e-07
	},
	"TrainerController._save_models": {
	"total": 0.48831162200076506,
	"count": 1,
	"self": 0.010910457989666611,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.47740116401109844,
	"count": 1,
	"self": 0.47740116401109844
	}
	}
	}
	}
	}
	}
	}