poca-SoccerTwos / run_logs /timers.json
polyconnect's picture
Longer training
0627939 verified
raw
history blame
20.2 kB
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.5035665035247803,
"min": 1.470587968826294,
"max": 3.295746326446533,
"count": 1770
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 28242.9921875,
"min": 27409.775390625,
"max": 120267.0,
"count": 1770
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 60.34567901234568,
"min": 39.3739837398374,
"max": 999.0,
"count": 1770
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19552.0,
"min": 14212.0,
"max": 28212.0,
"count": 1770
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1544.5104403214739,
"min": 1180.9941894213423,
"max": 1603.3526931779911,
"count": 1761
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 250210.69133207877,
"min": 2363.2477491938266,
"max": 392660.82324914914,
"count": 1761
},
"SoccerTwos.Step.mean": {
"value": 17699978.0,
"min": 9122.0,
"max": 17699978.0,
"count": 1770
},
"SoccerTwos.Step.sum": {
"value": 17699978.0,
"min": 9122.0,
"max": 17699978.0,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.028001178056001663,
"min": -0.1374976485967636,
"max": 0.2069934755563736,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -4.536190986633301,
"min": -28.18701934814453,
"max": 27.792827606201172,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.023671336472034454,
"min": -0.1340864896774292,
"max": 0.2030305713415146,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -3.834756374359131,
"min": -27.168577194213867,
"max": 27.650724411010742,
"count": 1770
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1770
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.07485432021411849,
"min": -0.6008615379150097,
"max": 0.43080000256030065,
"count": 1770
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 12.126399874687195,
"min": -63.243399918079376,
"max": 62.33080017566681,
"count": 1770
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.07485432021411849,
"min": -0.6008615379150097,
"max": 0.43080000256030065,
"count": 1770
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 12.126399874687195,
"min": -63.243399918079376,
"max": 62.33080017566681,
"count": 1770
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1770
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1770
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.013383328518830239,
"min": 0.010278040572908745,
"max": 0.02610366263737281,
"count": 857
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.013383328518830239,
"min": 0.010278040572908745,
"max": 0.02610366263737281,
"count": 857
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.1036595510939757,
"min": 8.419977239100262e-05,
"max": 0.12884489769736926,
"count": 857
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.1036595510939757,
"min": 8.419977239100262e-05,
"max": 0.12884489769736926,
"count": 857
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10672147423028946,
"min": 8.521313526822875e-05,
"max": 0.13119359935323396,
"count": 857
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10672147423028946,
"min": 8.521313526822875e-05,
"max": 0.13119359935323396,
"count": 857
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 857
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 857
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 857
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 857
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 857
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 857
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1718514434",
"python_version": "3.10.14 (main, Jun 14 2024, 18:23:26) [GCC 11.4.0]",
"command_line_arguments": "/home/ivan/Code/rl/hf/unit7/u7/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos50M --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.3.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1718538484"
},
"total": 24049.879396452976,
"count": 1,
"self": 0.0053754179971292615,
"children": {
"run_training.setup": {
"total": 0.011126903991680592,
"count": 1,
"self": 0.011126903991680592
},
"TrainerController.start_learning": {
"total": 24049.862894130987,
"count": 1,
"self": 19.95134469534969,
"children": {
"TrainerController._reset_env": {
"total": 2.48158151388634,
"count": 89,
"self": 2.48158151388634
},
"TrainerController.advance": {
"total": 24027.216803529765,
"count": 1224896,
"self": 20.33166449604323,
"children": {
"env_step": {
"total": 17344.032462770992,
"count": 1224896,
"self": 13189.24239976547,
"children": {
"SubprocessEnvManager._take_step": {
"total": 4141.220234239125,
"count": 1224896,
"self": 104.04888988181483,
"children": {
"TorchPolicy.evaluate": {
"total": 4037.17134435731,
"count": 2225464,
"self": 4037.17134435731
}
}
},
"workers": {
"total": 13.569828766398132,
"count": 1224895,
"self": 0.0,
"children": {
"worker_root": {
"total": 24019.578487852705,
"count": 1224895,
"is_parallel": true,
"self": 13159.66805658117,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.00173658796120435,
"count": 2,
"is_parallel": true,
"self": 0.00040872988756746054,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013278580736368895,
"count": 8,
"is_parallel": true,
"self": 0.0013278580736368895
}
}
},
"UnityEnvironment.step": {
"total": 0.018732728029135615,
"count": 1,
"is_parallel": true,
"self": 0.0003881650627590716,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000311177980620414,
"count": 1,
"is_parallel": true,
"self": 0.000311177980620414
},
"communicator.exchange": {
"total": 0.01675490802153945,
"count": 1,
"is_parallel": true,
"self": 0.01675490802153945
},
"steps_from_proto": {
"total": 0.0012784769642166793,
"count": 2,
"is_parallel": true,
"self": 0.0002583829336799681,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0010200940305367112,
"count": 8,
"is_parallel": true,
"self": 0.0010200940305367112
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 10859.790246265533,
"count": 1224894,
"is_parallel": true,
"self": 564.7082912233891,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 331.67259040189674,
"count": 1224894,
"is_parallel": true,
"self": 331.67259040189674
},
"communicator.exchange": {
"total": 8369.044318542641,
"count": 1224894,
"is_parallel": true,
"self": 8369.044318542641
},
"steps_from_proto": {
"total": 1594.365046097606,
"count": 2449788,
"is_parallel": true,
"self": 302.4276817381615,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1291.9373643594445,
"count": 9799152,
"is_parallel": true,
"self": 1291.9373643594445
}
}
}
}
},
"steps_from_proto": {
"total": 0.12018500600242987,
"count": 176,
"is_parallel": true,
"self": 0.022353325912263244,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.09783168009016663,
"count": 704,
"is_parallel": true,
"self": 0.09783168009016663
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 6662.85267626273,
"count": 1224895,
"self": 152.00744313228643,
"children": {
"process_trajectory": {
"total": 2579.932920970081,
"count": 1224895,
"self": 2573.186581147311,
"children": {
"RLTrainer._checkpoint": {
"total": 6.746339822770096,
"count": 35,
"self": 6.746339822770096
}
}
},
"_update_policy": {
"total": 3930.9123121603625,
"count": 857,
"self": 1843.0431646477082,
"children": {
"TorchPOCAOptimizer.update": {
"total": 2087.8691475126543,
"count": 25710,
"self": 2087.8691475126543
}
}
}
}
}
}
},
"trainer_threads": {
"total": 7.069902494549751e-07,
"count": 1,
"self": 7.069902494549751e-07
},
"TrainerController._save_models": {
"total": 0.2131636849953793,
"count": 1,
"self": 0.000949086039327085,
"children": {
"RLTrainer._checkpoint": {
"total": 0.21221459895605221,
"count": 1,
"self": 0.21221459895605221
}
}
}
}
}
}
}