Longer training

0627939 verified 6 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.5035665035247803,
	"min": 1.470587968826294,
	"max": 3.295746326446533,
	"count": 1770
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 28242.9921875,
	"min": 27409.775390625,
	"max": 120267.0,
	"count": 1770
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 60.34567901234568,
	"min": 39.3739837398374,
	"max": 999.0,
	"count": 1770
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19552.0,
	"min": 14212.0,
	"max": 28212.0,
	"count": 1770
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1544.5104403214739,
	"min": 1180.9941894213423,
	"max": 1603.3526931779911,
	"count": 1761
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 250210.69133207877,
	"min": 2363.2477491938266,
	"max": 392660.82324914914,
	"count": 1761
	},
	"SoccerTwos.Step.mean": {
	"value": 17699978.0,
	"min": 9122.0,
	"max": 17699978.0,
	"count": 1770
	},
	"SoccerTwos.Step.sum": {
	"value": 17699978.0,
	"min": 9122.0,
	"max": 17699978.0,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.028001178056001663,
	"min": -0.1374976485967636,
	"max": 0.2069934755563736,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -4.536190986633301,
	"min": -28.18701934814453,
	"max": 27.792827606201172,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.023671336472034454,
	"min": -0.1340864896774292,
	"max": 0.2030305713415146,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.834756374359131,
	"min": -27.168577194213867,
	"max": 27.650724411010742,
	"count": 1770
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1770
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.07485432021411849,
	"min": -0.6008615379150097,
	"max": 0.43080000256030065,
	"count": 1770
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 12.126399874687195,
	"min": -63.243399918079376,
	"max": 62.33080017566681,
	"count": 1770
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.07485432021411849,
	"min": -0.6008615379150097,
	"max": 0.43080000256030065,
	"count": 1770
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 12.126399874687195,
	"min": -63.243399918079376,
	"max": 62.33080017566681,
	"count": 1770
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1770
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1770
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.013383328518830239,
	"min": 0.010278040572908745,
	"max": 0.02610366263737281,
	"count": 857
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.013383328518830239,
	"min": 0.010278040572908745,
	"max": 0.02610366263737281,
	"count": 857
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.1036595510939757,
	"min": 8.419977239100262e-05,
	"max": 0.12884489769736926,
	"count": 857
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.1036595510939757,
	"min": 8.419977239100262e-05,
	"max": 0.12884489769736926,
	"count": 857
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10672147423028946,
	"min": 8.521313526822875e-05,
	"max": 0.13119359935323396,
	"count": 857
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10672147423028946,
	"min": 8.521313526822875e-05,
	"max": 0.13119359935323396,
	"count": 857
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 857
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 857
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 857
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 857
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 857
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 857
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1718514434",
	"python_version": "3.10.14 (main, Jun 14 2024, 18:23:26) [GCC 11.4.0]",
	"command_line_arguments": "/home/ivan/Code/rl/hf/unit7/u7/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos50M --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1718538484"
	},
	"total": 24049.879396452976,
	"count": 1,
	"self": 0.0053754179971292615,
	"children": {
	"run_training.setup": {
	"total": 0.011126903991680592,
	"count": 1,
	"self": 0.011126903991680592
	},
	"TrainerController.start_learning": {
	"total": 24049.862894130987,
	"count": 1,
	"self": 19.95134469534969,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.48158151388634,
	"count": 89,
	"self": 2.48158151388634
	},
	"TrainerController.advance": {
	"total": 24027.216803529765,
	"count": 1224896,
	"self": 20.33166449604323,
	"children": {
	"env_step": {
	"total": 17344.032462770992,
	"count": 1224896,
	"self": 13189.24239976547,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 4141.220234239125,
	"count": 1224896,
	"self": 104.04888988181483,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 4037.17134435731,
	"count": 2225464,
	"self": 4037.17134435731
	}
	}
	},
	"workers": {
	"total": 13.569828766398132,
	"count": 1224895,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 24019.578487852705,
	"count": 1224895,
	"is_parallel": true,
	"self": 13159.66805658117,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.00173658796120435,
	"count": 2,
	"is_parallel": true,
	"self": 0.00040872988756746054,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013278580736368895,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013278580736368895
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.018732728029135615,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003881650627590716,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.000311177980620414,
	"count": 1,
	"is_parallel": true,
	"self": 0.000311177980620414
	},
	"communicator.exchange": {
	"total": 0.01675490802153945,
	"count": 1,
	"is_parallel": true,
	"self": 0.01675490802153945
	},
	"steps_from_proto": {
	"total": 0.0012784769642166793,
	"count": 2,
	"is_parallel": true,
	"self": 0.0002583829336799681,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010200940305367112,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010200940305367112
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 10859.790246265533,
	"count": 1224894,
	"is_parallel": true,
	"self": 564.7082912233891,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 331.67259040189674,
	"count": 1224894,
	"is_parallel": true,
	"self": 331.67259040189674
	},
	"communicator.exchange": {
	"total": 8369.044318542641,
	"count": 1224894,
	"is_parallel": true,
	"self": 8369.044318542641
	},
	"steps_from_proto": {
	"total": 1594.365046097606,
	"count": 2449788,
	"is_parallel": true,
	"self": 302.4276817381615,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1291.9373643594445,
	"count": 9799152,
	"is_parallel": true,
	"self": 1291.9373643594445
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.12018500600242987,
	"count": 176,
	"is_parallel": true,
	"self": 0.022353325912263244,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.09783168009016663,
	"count": 704,
	"is_parallel": true,
	"self": 0.09783168009016663
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 6662.85267626273,
	"count": 1224895,
	"self": 152.00744313228643,
	"children": {
	"process_trajectory": {
	"total": 2579.932920970081,
	"count": 1224895,
	"self": 2573.186581147311,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 6.746339822770096,
	"count": 35,
	"self": 6.746339822770096
	}
	}
	},
	"_update_policy": {
	"total": 3930.9123121603625,
	"count": 857,
	"self": 1843.0431646477082,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 2087.8691475126543,
	"count": 25710,
	"self": 2087.8691475126543
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 7.069902494549751e-07,
	"count": 1,
	"self": 7.069902494549751e-07
	},
	"TrainerController._save_models": {
	"total": 0.2131636849953793,
	"count": 1,
	"self": 0.000949086039327085,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.21221459895605221,
	"count": 1,
	"self": 0.21221459895605221
	}
	}
	}
	}
	}
	}
	}