guirnd's picture
First Push
e3f002e verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.2583315670490265,
"min": 0.21724596619606018,
"max": 1.4476444721221924,
"count": 54
},
"Pyramids.Policy.Entropy.sum": {
"value": 7787.146484375,
"min": 6513.90283203125,
"max": 43915.7421875,
"count": 54
},
"Pyramids.Step.mean": {
"value": 1619931.0,
"min": 29971.0,
"max": 1619931.0,
"count": 54
},
"Pyramids.Step.sum": {
"value": 1619931.0,
"min": 29971.0,
"max": 1619931.0,
"count": 54
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7295573949813843,
"min": -0.0806223452091217,
"max": 0.7685034871101379,
"count": 54
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 211.57164001464844,
"min": -19.268739700317383,
"max": 224.777587890625,
"count": 54
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.0080729890614748,
"min": -0.0080729890614748,
"max": 0.35350826382637024,
"count": 54
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -2.3411667346954346,
"min": -2.3411667346954346,
"max": 84.84197998046875,
"count": 54
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.07137363389434738,
"min": 0.06481730613935117,
"max": 0.07400298154106948,
"count": 54
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9992308745208633,
"min": 0.5494496235099618,
"max": 1.0635391915663301,
"count": 54
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.016405179642215214,
"min": 0.0005872933246925236,
"max": 0.01656315394987663,
"count": 54
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.22967251499101302,
"min": 0.007634813221002808,
"max": 0.24844730924814942,
"count": 54
},
"Pyramids.Policy.LearningRate.mean": {
"value": 0.0001394913035029167,
"min": 0.0001394913035029167,
"max": 0.00029828408807197084,
"count": 54
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0019528782490408337,
"min": 0.0019528782490408337,
"max": 0.003969298376900566,
"count": 54
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.14649708333333336,
"min": 0.14649708333333336,
"max": 0.1994280291666667,
"count": 54
},
"Pyramids.Policy.Epsilon.sum": {
"value": 2.050959166666667,
"min": 1.5954242333333335,
"max": 2.7674801333333336,
"count": 54
},
"Pyramids.Policy.Beta.mean": {
"value": 0.004655058625,
"min": 0.004655058625,
"max": 0.00994286011375,
"count": 54
},
"Pyramids.Policy.Beta.sum": {
"value": 0.06517082075,
"min": 0.06517082075,
"max": 0.13231763339000002,
"count": 54
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.009542092680931091,
"min": 0.009392058476805687,
"max": 0.44797787070274353,
"count": 54
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.13358929753303528,
"min": 0.13148881494998932,
"max": 3.5838229656219482,
"count": 54
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 246.64406779661016,
"min": 246.64406779661016,
"max": 999.0,
"count": 54
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29104.0,
"min": 16386.0,
"max": 32627.0,
"count": 54
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.720013428510738,
"min": -0.9999750521965325,
"max": 1.74028300522071,
"count": 54
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 204.68159799277782,
"min": -31.99920167028904,
"max": 206.18079817295074,
"count": 54
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.720013428510738,
"min": -0.9999750521965325,
"max": 1.74028300522071,
"count": 54
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 204.68159799277782,
"min": -31.99920167028904,
"max": 206.18079817295074,
"count": 54
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.02405466943478017,
"min": 0.02405466943478017,
"max": 8.963894915054826,
"count": 54
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.8625056627388403,
"min": 2.7509678110363893,
"max": 152.38621355593204,
"count": 54
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 54
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 54
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1707224872",
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.0+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1707229061"
},
"total": 4188.915410618,
"count": 1,
"self": 0.38642960200013476,
"children": {
"run_training.setup": {
"total": 0.05230056000004879,
"count": 1,
"self": 0.05230056000004879
},
"TrainerController.start_learning": {
"total": 4188.476680456,
"count": 1,
"self": 3.050391059863614,
"children": {
"TrainerController._reset_env": {
"total": 2.4704366639998625,
"count": 1,
"self": 2.4704366639998625
},
"TrainerController.advance": {
"total": 4182.815778159137,
"count": 104963,
"self": 3.3258992652181405,
"children": {
"env_step": {
"total": 3095.8190125259234,
"count": 104963,
"self": 2833.5798398598845,
"children": {
"SubprocessEnvManager._take_step": {
"total": 260.2718381469265,
"count": 104963,
"self": 9.338487544950112,
"children": {
"TorchPolicy.evaluate": {
"total": 250.93335060197637,
"count": 101703,
"self": 250.93335060197637
}
}
},
"workers": {
"total": 1.967334519112228,
"count": 104962,
"self": 0.0,
"children": {
"worker_root": {
"total": 4178.421330616076,
"count": 104962,
"is_parallel": true,
"self": 1576.2555932979972,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0019868429999405635,
"count": 1,
"is_parallel": true,
"self": 0.0006634890003169858,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013233539996235777,
"count": 8,
"is_parallel": true,
"self": 0.0013233539996235777
}
}
},
"UnityEnvironment.step": {
"total": 0.05427606300008847,
"count": 1,
"is_parallel": true,
"self": 0.0006261709997943399,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000490842000090197,
"count": 1,
"is_parallel": true,
"self": 0.000490842000090197
},
"communicator.exchange": {
"total": 0.051489238000158366,
"count": 1,
"is_parallel": true,
"self": 0.051489238000158366
},
"steps_from_proto": {
"total": 0.0016698120000455674,
"count": 1,
"is_parallel": true,
"self": 0.00035045399999944493,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013193580000461225,
"count": 8,
"is_parallel": true,
"self": 0.0013193580000461225
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2602.165737318079,
"count": 104961,
"is_parallel": true,
"self": 65.08685064810425,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 43.420788156953904,
"count": 104961,
"is_parallel": true,
"self": 43.420788156953904
},
"communicator.exchange": {
"total": 2315.9833638811124,
"count": 104961,
"is_parallel": true,
"self": 2315.9833638811124
},
"steps_from_proto": {
"total": 177.67473463190913,
"count": 104961,
"is_parallel": true,
"self": 37.319316944888214,
"children": {
"_process_rank_one_or_two_observation": {
"total": 140.35541768702092,
"count": 839688,
"is_parallel": true,
"self": 140.35541768702092
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1083.6708663679951,
"count": 104962,
"self": 6.024583469179106,
"children": {
"process_trajectory": {
"total": 221.3339910068123,
"count": 104962,
"self": 221.04260457281202,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2913864340002874,
"count": 3,
"self": 0.2913864340002874
}
}
},
"_update_policy": {
"total": 856.3122918920037,
"count": 750,
"self": 498.8926279559762,
"children": {
"TorchPPOOptimizer.update": {
"total": 357.4196639360275,
"count": 37050,
"self": 357.4196639360275
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.5849991541472264e-06,
"count": 1,
"self": 1.5849991541472264e-06
},
"TrainerController._save_models": {
"total": 0.14007298800061108,
"count": 1,
"self": 0.0022928650014364393,
"children": {
"RLTrainer._checkpoint": {
"total": 0.13778012299917464,
"count": 1,
"self": 0.13778012299917464
}
}
}
}
}
}
}