5M steps, increased learning rate with linear scheduler

5c58095 12 months ago

15.7 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.755637288093567,
	"min": 1.7151814699172974,
	"max": 3.295724630355835,
	"count": 499
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 36910.51953125,
	"min": 13612.6572265625,
	"max": 172905.75,
	"count": 499
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 63.6125,
	"min": 39.645161290322584,
	"max": 999.0,
	"count": 499
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 20356.0,
	"min": 10692.0,
	"max": 31500.0,
	"count": 499
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1540.6404213531655,
	"min": 1197.4957713356807,
	"max": 1576.4143309371482,
	"count": 478
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 246502.4674165065,
	"min": 2399.421175774324,
	"max": 363079.2013600959,
	"count": 478
	},
	"SoccerTwos.Step.mean": {
	"value": 4999972.0,
	"min": 9254.0,
	"max": 4999972.0,
	"count": 500
	},
	"SoccerTwos.Step.sum": {
	"value": 4999972.0,
	"min": 9254.0,
	"max": 4999972.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.034482039511203766,
	"min": -0.10746785998344421,
	"max": 0.16136549413204193,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -5.517126083374023,
	"min": -22.675718307495117,
	"max": 27.432022094726562,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.034504301846027374,
	"min": -0.11580751091241837,
	"max": 0.1590351015329361,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -5.520688533782959,
	"min": -24.43538475036621,
	"max": 26.952106475830078,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.08047500103712082,
	"min": -0.5921090895479376,
	"max": 0.4576799988746643,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -12.876000165939331,
	"min": -66.35360014438629,
	"max": 60.00919944047928,
	"count": 500
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.08047500103712082,
	"min": -0.5921090895479376,
	"max": 0.4576799988746643,
	"count": 500
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -12.876000165939331,
	"min": -66.35360014438629,
	"max": 60.00919944047928,
	"count": 500
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.015034021192695946,
	"min": 0.010941779592152064,
	"max": 0.024873137117053072,
	"count": 239
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.015034021192695946,
	"min": 0.010941779592152064,
	"max": 0.024873137117053072,
	"count": 239
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.11739198019107183,
	"min": 3.1021232795562052e-06,
	"max": 0.1251884085436662,
	"count": 239
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.11739198019107183,
	"min": 3.1021232795562052e-06,
	"max": 0.1251884085436662,
	"count": 239
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.11775108724832535,
	"min": 3.304618659664508e-06,
	"max": 0.1258082126577695,
	"count": 239
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.11775108724832535,
	"min": 3.304618659664508e-06,
	"max": 0.1258082126577695,
	"count": 239
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 1.0132997973599917e-06,
	"min": 1.0132997973599917e-06,
	"max": 0.0004968980006203998,
	"count": 239
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 1.0132997973599917e-06,
	"min": 1.0132997973599917e-06,
	"max": 0.0004968980006203998,
	"count": 239
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.10020264000000001,
	"min": 0.10020264000000001,
	"max": 0.19937960000000005,
	"count": 239
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.10020264000000001,
	"min": 0.10020264000000001,
	"max": 0.19937960000000005,
	"count": 239
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 2.0111735999999927e-05,
	"min": 2.0111735999999927e-05,
	"max": 0.004969042040000001,
	"count": 239
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 2.0111735999999927e-05,
	"min": 2.0111735999999927e-05,
	"max": 0.004969042040000001,
	"count": 239
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1703848764",
	"python_version": "3.10.12 (main, Jul 5 2023, 15:02:25) [Clang 14.0.6 ]",
	"command_line_arguments": "/opt/homebrew/Caskroom/miniconda/base/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.app --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.2",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1703853889"
	},
	"total": 5124.430119083001,
	"count": 1,
	"self": 0.19001916600973345,
	"children": {
	"run_training.setup": {
	"total": 0.012270416977116838,
	"count": 1,
	"self": 0.012270416977116838
	},
	"TrainerController.start_learning": {
	"total": 5124.227829500014,
	"count": 1,
	"self": 2.531355981802335,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.5887895020714495,
	"count": 20,
	"self": 1.5887895020714495
	},
	"TrainerController.advance": {
	"total": 5119.955040642148,
	"count": 341985,
	"self": 2.185286336200079,
	"children": {
	"env_step": {
	"total": 2303.891035468987,
	"count": 341985,
	"self": 1941.7491068352829,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 360.6655651805049,
	"count": 341985,
	"self": 11.17105897743022,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 349.4945062030747,
	"count": 631296,
	"self": 349.4945062030747
	}
	}
	},
	"workers": {
	"total": 1.4763634531991556,
	"count": 341985,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 5119.23122701954,
	"count": 341985,
	"is_parallel": true,
	"self": 3492.6540978672565,
	"children": {
	"steps_from_proto": {
	"total": 0.013060167053481564,
	"count": 40,
	"is_parallel": true,
	"self": 0.0021915972174610943,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.01086856983602047,
	"count": 160,
	"is_parallel": true,
	"self": 0.01086856983602047
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1626.5640689852298,
	"count": 341985,
	"is_parallel": true,
	"self": 78.99318619386759,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 37.23996753204847,
	"count": 341985,
	"is_parallel": true,
	"self": 37.23996753204847
	},
	"communicator.exchange": {
	"total": 1310.1512493426271,
	"count": 341985,
	"is_parallel": true,
	"self": 1310.1512493426271
	},
	"steps_from_proto": {
	"total": 200.17966591668664,
	"count": 683970,
	"is_parallel": true,
	"self": 30.950994182756403,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 169.22867173393024,
	"count": 2735880,
	"is_parallel": true,
	"self": 169.22867173393024
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 2813.8787188369606,
	"count": 341985,
	"self": 21.16630858287681,
	"children": {
	"process_trajectory": {
	"total": 556.1312195862702,
	"count": 341985,
	"self": 554.5936779602489,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.537541626021266,
	"count": 10,
	"self": 1.537541626021266
	}
	}
	},
	"_update_policy": {
	"total": 2236.5811906678136,
	"count": 239,
	"self": 242.41541127837263,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 1994.165779389441,
	"count": 7185,
	"self": 1994.165779389441
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 3.3300602808594704e-07,
	"count": 1,
	"self": 3.3300602808594704e-07
	},
	"TrainerController._save_models": {
	"total": 0.15264304098673165,
	"count": 1,
	"self": 0.000656957970932126,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.15198608301579952,
	"count": 1,
	"self": 0.15198608301579952
	}
	}
	}
	}
	}
	}
	}