adrian-nf's picture
Agent training resumed from the checkpoint.
788ef60 verified
raw
history blame
19.7 kB
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.7527378797531128,
"min": 1.731589913368225,
"max": 2.0413029193878174,
"count": 226
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 33876.91796875,
"min": 30746.474609375,
"max": 46205.22265625,
"count": 226
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 66.76388888888889,
"min": 44.77064220183486,
"max": 88.92592592592592,
"count": 226
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19228.0,
"min": 18552.0,
"max": 20876.0,
"count": 226
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1539.329570443296,
"min": 1497.5767979757456,
"max": 1580.1131310310047,
"count": 226
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 221663.45814383464,
"min": 164060.20435254605,
"max": 341203.8260200822,
"count": 226
},
"SoccerTwos.Step.mean": {
"value": 7259984.0,
"min": 5009958.0,
"max": 7259984.0,
"count": 226
},
"SoccerTwos.Step.sum": {
"value": 7259984.0,
"min": 5009958.0,
"max": 7259984.0,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.009983843192458153,
"min": -0.09836140275001526,
"max": 0.08452701568603516,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -1.4476572275161743,
"min": -15.627198219299316,
"max": 13.186214447021484,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.01340704970061779,
"min": -0.09892755001783371,
"max": 0.07970672845840454,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -1.9440221786499023,
"min": -16.946834564208984,
"max": 13.001588821411133,
"count": 226
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 226
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.07598344745307133,
"min": -0.2877124181759903,
"max": 0.24915163345586241,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 11.017599880695343,
"min": -49.74940013885498,
"max": 38.56599986553192,
"count": 226
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.07598344745307133,
"min": -0.2877124181759903,
"max": 0.24915163345586241,
"count": 226
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 11.017599880695343,
"min": -49.74940013885498,
"max": 38.56599986553192,
"count": 226
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 226
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 226
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.021885531744919716,
"min": 0.012661570597750445,
"max": 0.022789206355810165,
"count": 109
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.021885531744919716,
"min": 0.012661570597750445,
"max": 0.022789206355810165,
"count": 109
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09653946161270141,
"min": 0.07622768034537633,
"max": 0.11267870639761289,
"count": 109
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09653946161270141,
"min": 0.07622768034537633,
"max": 0.11267870639761289,
"count": 109
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.09882887626687685,
"min": 0.07700358976920446,
"max": 0.11486682246128718,
"count": 109
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.09882887626687685,
"min": 0.07700358976920446,
"max": 0.11486682246128718,
"count": 109
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 109
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 109
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 109
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 109
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 109
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 109
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1734636362",
"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./results/SoccerTwosNew/configuration.yaml --env=train-soccer/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwosNew --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.5.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1734650253"
},
"total": 13891.128242882998,
"count": 1,
"self": 0.675532804998511,
"children": {
"run_training.setup": {
"total": 0.12229162600124255,
"count": 1,
"self": 0.12229162600124255
},
"TrainerController.start_learning": {
"total": 13890.330418451998,
"count": 1,
"self": 6.579956332603615,
"children": {
"TrainerController._reset_env": {
"total": 3.651269563000824,
"count": 13,
"self": 3.651269563000824
},
"TrainerController.advance": {
"total": 13880.099190477398,
"count": 156038,
"self": 6.859771244569856,
"children": {
"env_step": {
"total": 5160.954000388654,
"count": 156038,
"self": 4135.854043012016,
"children": {
"SubprocessEnvManager._take_step": {
"total": 1021.3599805107733,
"count": 156038,
"self": 37.73679299061769,
"children": {
"TorchPolicy.evaluate": {
"total": 983.6231875201556,
"count": 284144,
"self": 983.6231875201556
}
}
},
"workers": {
"total": 3.7399768658651737,
"count": 156038,
"self": 0.0,
"children": {
"worker_root": {
"total": 13864.58591102602,
"count": 156038,
"is_parallel": true,
"self": 10430.003311618711,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.004784036002092762,
"count": 2,
"is_parallel": true,
"self": 0.0013643790007336065,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0034196570013591554,
"count": 8,
"is_parallel": true,
"self": 0.0034196570013591554
}
}
},
"UnityEnvironment.step": {
"total": 0.049160418999235844,
"count": 1,
"is_parallel": true,
"self": 0.0014789649940212257,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0009997170018323231,
"count": 1,
"is_parallel": true,
"self": 0.0009997170018323231
},
"communicator.exchange": {
"total": 0.04229447900070227,
"count": 1,
"is_parallel": true,
"self": 0.04229447900070227
},
"steps_from_proto": {
"total": 0.004387258002680028,
"count": 2,
"is_parallel": true,
"self": 0.0007887780011515133,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0035984800015285145,
"count": 8,
"is_parallel": true,
"self": 0.0035984800015285145
}
}
}
}
}
}
},
"steps_from_proto": {
"total": 0.03430388499691617,
"count": 24,
"is_parallel": true,
"self": 0.006769309999071993,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.027534574997844175,
"count": 96,
"is_parallel": true,
"self": 0.027534574997844175
}
}
},
"UnityEnvironment.step": {
"total": 3434.5482955223124,
"count": 156037,
"is_parallel": true,
"self": 209.41908011729902,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 134.4979276757731,
"count": 156037,
"is_parallel": true,
"self": 134.4979276757731
},
"communicator.exchange": {
"total": 2446.2236635178488,
"count": 156037,
"is_parallel": true,
"self": 2446.2236635178488
},
"steps_from_proto": {
"total": 644.4076242113915,
"count": 312074,
"is_parallel": true,
"self": 113.29622146241672,
"children": {
"_process_rank_one_or_two_observation": {
"total": 531.1114027489748,
"count": 1248296,
"is_parallel": true,
"self": 531.1114027489748
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 8712.285418844174,
"count": 156038,
"self": 46.07346530166615,
"children": {
"process_trajectory": {
"total": 1352.0423007915306,
"count": 156038,
"self": 1351.0319964295304,
"children": {
"RLTrainer._checkpoint": {
"total": 1.01030436200017,
"count": 4,
"self": 1.01030436200017
}
}
},
"_update_policy": {
"total": 7314.169652750978,
"count": 110,
"self": 451.66970887399293,
"children": {
"TorchPOCAOptimizer.update": {
"total": 6862.499943876985,
"count": 3273,
"self": 6862.499943876985
}
}
}
}
}
}
},
"trainer_threads": {
"total": 2.078995748888701e-06,
"count": 1,
"self": 2.078995748888701e-06
}
}
}
}
}