POCA with 50M steps

a88cca5 verified 10 months ago

20.3 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.3972941637039185,
	"min": 1.285732626914978,
	"max": 3.2957520484924316,
	"count": 5000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 27319.89453125,
	"min": 22465.439453125,
	"max": 105464.0625,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 86.96428571428571,
	"min": 41.85470085470085,
	"max": 999.0,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19480.0,
	"min": 12948.0,
	"max": 30384.0,
	"count": 5000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1656.5532181521019,
	"min": 1192.0640787340647,
	"max": 1761.2894049839942,
	"count": 4984
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 185533.96043303542,
	"min": 2387.000884273693,
	"max": 390578.5637410532,
	"count": 4984
	},
	"SoccerTwos.Step.mean": {
	"value": 49999914.0,
	"min": 9208.0,
	"max": 49999914.0,
	"count": 5000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999914.0,
	"min": 9208.0,
	"max": 49999914.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.03263528645038605,
	"min": -0.12490427494049072,
	"max": 0.1818830519914627,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -3.6877875328063965,
	"min": -22.885658264160156,
	"max": 35.649078369140625,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.03411535918712616,
	"min": -0.12752260267734528,
	"max": 0.1826234608888626,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.8550355434417725,
	"min": -22.78534507751465,
	"max": 35.79419708251953,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.05500884815654923,
	"min": -0.828371429017612,
	"max": 0.5015090974894437,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -6.2159998416900635,
	"min": -71.7260000705719,
	"max": 77.86560010910034,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.05500884815654923,
	"min": -0.828371429017612,
	"max": 0.5015090974894437,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -6.2159998416900635,
	"min": -71.7260000705719,
	"max": 77.86560010910034,
	"count": 5000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.02252829516849791,
	"min": 0.010176922385532331,
	"max": 0.02608171648074252,
	"count": 2424
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.02252829516849791,
	"min": 0.010176922385532331,
	"max": 0.02608171648074252,
	"count": 2424
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.0808956374724706,
	"min": 4.394343166798838e-06,
	"max": 0.1212976984679699,
	"count": 2424
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.0808956374724706,
	"min": 4.394343166798838e-06,
	"max": 0.1212976984679699,
	"count": 2424
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.08190159449974696,
	"min": 4.7147190495403874e-06,
	"max": 0.12247925847768784,
	"count": 2424
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.08190159449974696,
	"min": 4.7147190495403874e-06,
	"max": 0.12247925847768784,
	"count": 2424
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2424
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2424
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 2424
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 2424
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2424
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2424
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1708532550",
	"python_version": "3.10.12 \| packaged by conda-forge \| (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
	"command_line_arguments": "/data/home/adityacv/anaconda3/envs/mla/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1708602499"
	},
	"total": 69949.030304702,
	"count": 1,
	"self": 0.4681981570320204,
	"children": {
	"run_training.setup": {
	"total": 0.01178661291487515,
	"count": 1,
	"self": 0.01178661291487515
	},
	"TrainerController.start_learning": {
	"total": 69948.55031993205,
	"count": 1,
	"self": 31.1824635991361,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.883301006979309,
	"count": 250,
	"self": 4.883301006979309
	},
	"TrainerController.advance": {
	"total": 69912.13297457201,
	"count": 3431941,
	"self": 34.62998353119474,
	"children": {
	"env_step": {
	"total": 56880.90749909496,
	"count": 3431941,
	"self": 38933.40851271374,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 17928.185198108084,
	"count": 3431941,
	"self": 240.953698049183,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 17687.2315000589,
	"count": 6279836,
	"self": 17687.2315000589
	}
	}
	},
	"workers": {
	"total": 19.313788273138925,
	"count": 3431941,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 69895.10759405745,
	"count": 3431941,
	"is_parallel": true,
	"self": 35278.502755678375,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.002374101895838976,
	"count": 2,
	"is_parallel": true,
	"self": 0.0005878220545127988,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0017862798413261771,
	"count": 8,
	"is_parallel": true,
	"self": 0.0017862798413261771
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.015894436975941062,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004038279876112938,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00033038004767149687,
	"count": 1,
	"is_parallel": true,
	"self": 0.00033038004767149687
	},
	"communicator.exchange": {
	"total": 0.013942104997113347,
	"count": 1,
	"is_parallel": true,
	"self": 0.013942104997113347
	},
	"steps_from_proto": {
	"total": 0.0012181239435449243,
	"count": 2,
	"is_parallel": true,
	"self": 0.00023312412668019533,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.000984999816864729,
	"count": 8,
	"is_parallel": true,
	"self": 0.000984999816864729
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 34616.06519816734,
	"count": 3431940,
	"is_parallel": true,
	"self": 1272.0759775217157,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 818.3795400334056,
	"count": 3431940,
	"is_parallel": true,
	"self": 818.3795400334056
	},
	"communicator.exchange": {
	"total": 28709.032677426352,
	"count": 3431940,
	"is_parallel": true,
	"self": 28709.032677426352
	},
	"steps_from_proto": {
	"total": 3816.57700318587,
	"count": 6863880,
	"is_parallel": true,
	"self": 683.8799885136541,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 3132.697014672216,
	"count": 27455520,
	"is_parallel": true,
	"self": 3132.697014672216
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.5396402117330581,
	"count": 498,
	"is_parallel": true,
	"self": 0.0967952188802883,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.4428449928527698,
	"count": 1992,
	"is_parallel": true,
	"self": 0.4428449928527698
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 12996.59549194586,
	"count": 3431941,
	"self": 278.6270368200494,
	"children": {
	"process_trajectory": {
	"total": 6458.452643502853,
	"count": 3431941,
	"self": 6427.417760323384,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 31.034883179469034,
	"count": 100,
	"self": 31.034883179469034
	}
	}
	},
	"_update_policy": {
	"total": 6259.515811622958,
	"count": 2424,
	"self": 3791.8440035417443,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 2467.6718080812134,
	"count": 72720,
	"self": 2467.6718080812134
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 6.019836291670799e-07,
	"count": 1,
	"self": 6.019836291670799e-07
	},
	"TrainerController._save_models": {
	"total": 0.3515801519388333,
	"count": 1,
	"self": 0.001964784925803542,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.34961536701302975,
	"count": 1,
	"self": 0.34961536701302975
	}
	}
	}
	}
	}
	}
	}