15M steps

55a8929 verified 9 months ago

20.3 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.11602783203125,
	"min": 1.1030526161193848,
	"max": 1.4285537004470825,
	"count": 500
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 22320.556640625,
	"min": 21029.970703125,
	"max": 28662.501953125,
	"count": 500
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 45.905660377358494,
	"min": 35.28,
	"max": 55.38202247191011,
	"count": 500
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19464.0,
	"min": 14112.0,
	"max": 21312.0,
	"count": 500
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1672.5986965358836,
	"min": 1594.9761687604023,
	"max": 1684.8696129621046,
	"count": 500
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 354590.92366560735,
	"min": 291606.7260394614,
	"max": 450906.7595913422,
	"count": 500
	},
	"SoccerTwos.Step.mean": {
	"value": 14999982.0,
	"min": 10009972.0,
	"max": 14999982.0,
	"count": 500
	},
	"SoccerTwos.Step.sum": {
	"value": 14999982.0,
	"min": 10009972.0,
	"max": 14999982.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.033846165984869,
	"min": -0.10714015364646912,
	"max": 0.09757562726736069,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -13.876928329467773,
	"min": -45.2131462097168,
	"max": 41.95751953125,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.03533671796321869,
	"min": -0.11142526566982269,
	"max": 0.10013997554779053,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -14.488054275512695,
	"min": -47.021461486816406,
	"max": 43.06018829345703,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.07970754661650029,
	"min": -0.26102524301380786,
	"max": 0.3510517215934293,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -16.89799988269806,
	"min": -55.66319966316223,
	"max": 81.4439994096756,
	"count": 500
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.07970754661650029,
	"min": -0.26102524301380786,
	"max": 0.3510517215934293,
	"count": 500
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -16.89799988269806,
	"min": -55.66319966316223,
	"max": 81.4439994096756,
	"count": 500
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.012091718217061499,
	"min": 0.009822013491066173,
	"max": 0.015032402960059699,
	"count": 76
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.012091718217061499,
	"min": 0.009822013491066173,
	"max": 0.015032402960059699,
	"count": 76
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.05806648256257176,
	"min": 0.05226881531998515,
	"max": 0.059615218080580235,
	"count": 76
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.05806648256257176,
	"min": 0.05226881531998515,
	"max": 0.059615218080580235,
	"count": 76
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.05836009904742241,
	"min": 0.0542876117862761,
	"max": 0.06110596880316734,
	"count": 76
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.05836009904742241,
	"min": 0.0542876117862761,
	"max": 0.06110596880316734,
	"count": 76
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 4.0553810392494676e-07,
	"min": 4.0553810392494676e-07,
	"max": 0.0001428860023203168,
	"count": 76
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 4.0553810392494676e-07,
	"min": 4.0553810392494676e-07,
	"max": 0.0001428860023203168,
	"count": 76
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.10005440800688417,
	"min": 0.10005440800688417,
	"max": 0.11917465842958205,
	"count": 76
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.10005440800688417,
	"min": 0.10005440800688417,
	"max": 0.11917465842958205,
	"count": 76
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 1.0169934717551432e-05,
	"min": 1.0169934717551432e-05,
	"max": 6.988898235716646e-05,
	"count": 76
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 1.0169934717551432e-05,
	"min": 1.0169934717551432e-05,
	"max": 6.988898235716646e-05,
	"count": 76
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1710358573",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/stephan/.conda/envs/rl/bin/mlagents-learn ./ml-agents/config/poca/SoccerTwos.yaml --env=./ml-agents/training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos-read-deal --width=1280 --height=720 --resume",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1710372965"
	},
	"total": 14392.268066103992,
	"count": 1,
	"self": 0.3203680879960302,
	"children": {
	"run_training.setup": {
	"total": 0.017996288996073417,
	"count": 1,
	"self": 0.017996288996073417
	},
	"TrainerController.start_learning": {
	"total": 14391.929701727,
	"count": 1,
	"self": 7.222826109427842,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.3356985290010925,
	"count": 10,
	"self": 4.3356985290010925
	},
	"TrainerController.advance": {
	"total": 14380.252779454575,
	"count": 357131,
	"self": 6.432589866788476,
	"children": {
	"env_step": {
	"total": 6466.631959918537,
	"count": 357131,
	"self": 5498.500712261317,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 963.7606686544896,
	"count": 357131,
	"self": 29.2971219017345,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 934.4635467527551,
	"count": 624880,
	"self": 934.4635467527551
	}
	}
	},
	"workers": {
	"total": 4.370579002730665,
	"count": 357131,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 14380.838316511697,
	"count": 357131,
	"is_parallel": true,
	"self": 9673.599672775075,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.06619343400234357,
	"count": 2,
	"is_parallel": true,
	"self": 0.0004880309716099873,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.06570540303073358,
	"count": 8,
	"is_parallel": true,
	"self": 0.06570540303073358
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.027265490003628656,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006784550059819594,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004930220020469278,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004930220020469278
	},
	"communicator.exchange": {
	"total": 0.024494213997968473,
	"count": 1,
	"is_parallel": true,
	"self": 0.024494213997968473
	},
	"steps_from_proto": {
	"total": 0.0015997989976312965,
	"count": 2,
	"is_parallel": true,
	"self": 0.00032821799686644226,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0012715810007648543,
	"count": 8,
	"is_parallel": true,
	"self": 0.0012715810007648543
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.018047201985609718,
	"count": 18,
	"is_parallel": true,
	"self": 0.0030916809337213635,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.014955521051888354,
	"count": 72,
	"is_parallel": true,
	"self": 0.014955521051888354
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 4707.220596534637,
	"count": 357130,
	"is_parallel": true,
	"self": 257.41373912687413,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 162.74674167780904,
	"count": 357130,
	"is_parallel": true,
	"self": 162.74674167780904
	},
	"communicator.exchange": {
	"total": 3645.2510425291694,
	"count": 357130,
	"is_parallel": true,
	"self": 3645.2510425291694
	},
	"steps_from_proto": {
	"total": 641.809073200784,
	"count": 714260,
	"is_parallel": true,
	"self": 114.93468356617086,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 526.8743896346132,
	"count": 2857040,
	"is_parallel": true,
	"self": 526.8743896346132
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 7907.188229669249,
	"count": 357131,
	"self": 41.56422363645106,
	"children": {
	"process_trajectory": {
	"total": 1667.4114032577781,
	"count": 357131,
	"self": 1666.2047581057705,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.2066451520076953,
	"count": 10,
	"self": 1.2066451520076953
	}
	}
	},
	"_update_policy": {
	"total": 6198.21260277502,
	"count": 76,
	"self": 967.3907746167824,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 5230.821828158238,
	"count": 6080,
	"self": 5230.821828158238
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 7.199996616691351e-07,
	"count": 1,
	"self": 7.199996616691351e-07
	},
	"TrainerController._save_models": {
	"total": 0.11839691399654839,
	"count": 1,
	"self": 0.0024417589884251356,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.11595515500812326,
	"count": 1,
	"self": 0.11595515500812326
	}
	}
	}
	}
	}
	}
	}