cpgrant commited on
Commit
030b35f
1 Parent(s): 4aadbc4

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1724676673.W11-AI24 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd13eb64b80a2078b9123515eb16672467f06a08627a226f8218b36fde4d86c5
3
+ size 48930
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
- value: 4.06 +/- 0.35
19
  name: mean_reward
20
  verified: false
21
  ---
 
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
+ value: 4.03 +/- 0.29
19
  name: mean_reward
20
  verified: false
21
  ---
checkpoint_p0/best_000000597_2445312_reward_4.609.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7b7a6fce8486f18b01900337047eb0245f48e5dc4bb6bb5a83ac83384e2215
3
+ size 34929051
checkpoint_p0/checkpoint_000000978_4005888.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a917c6472e8f8a967e0e481690db62b2088aca10545b87611472999e85f4d483
3
- size 34929541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72ea91eb80261e4a8732e68cea18cab2ca08344ff235cc9a49d2d4b9df387f53
3
+ size 34929477
config.json CHANGED
@@ -15,8 +15,8 @@
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
- "num_workers": 4,
19
- "num_envs_per_worker": 2,
20
  "batch_size": 1024,
21
  "num_batches_per_epoch": 1,
22
  "num_epochs": 1,
@@ -65,7 +65,7 @@
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 600,
68
- "train_for_env_steps": 1000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
@@ -130,12 +130,12 @@
130
  "wide_aspect_ratio": false,
131
  "eval_env_frameskip": 1,
132
  "fps": 35,
133
- "command_line": "--env=doom_health_gathering_supreme --num_workers=4 --num_envs_per_worker=2 --train_for_env_steps=1000000",
134
  "cli_args": {
135
  "env": "doom_health_gathering_supreme",
136
- "num_workers": 4,
137
- "num_envs_per_worker": 2,
138
- "train_for_env_steps": 1000000
139
  },
140
  "git_hash": "unknown",
141
  "git_repo_name": "not a git repository"
 
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
+ "num_workers": 24,
19
+ "num_envs_per_worker": 12,
20
  "batch_size": 1024,
21
  "num_batches_per_epoch": 1,
22
  "num_epochs": 1,
 
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 4000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
 
130
  "wide_aspect_ratio": false,
131
  "eval_env_frameskip": 1,
132
  "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --num_workers=24 --num_envs_per_worker=12 --train_for_env_steps=4000000",
134
  "cli_args": {
135
  "env": "doom_health_gathering_supreme",
136
+ "num_workers": 24,
137
+ "num_envs_per_worker": 12,
138
+ "train_for_env_steps": 4000000
139
  },
140
  "git_hash": "unknown",
141
  "git_repo_name": "not a git repository"
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae0bed03ba6ac93136f3761fb73fb9b4c5a0e388b416965a7a15631c51928a75
3
- size 6014462
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484a7f3aeb83bdf7d3651c52d49eb8cb31eee039a7d925965a07d08300b09ad9
3
+ size 5902261
sf_log.txt CHANGED
@@ -1,31 +1,62 @@
1
- [2024-08-26 14:41:12,704][93406] Saving configuration to /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/config.json...
2
- [2024-08-26 14:41:12,705][93406] Rollout worker 0 uses device cpu
3
- [2024-08-26 14:41:12,705][93406] Rollout worker 1 uses device cpu
4
- [2024-08-26 14:41:12,705][93406] Rollout worker 2 uses device cpu
5
- [2024-08-26 14:41:12,705][93406] Rollout worker 3 uses device cpu
6
- [2024-08-26 14:41:12,755][93406] Using GPUs [0] for process 0 (actually maps to GPUs [0])
7
- [2024-08-26 14:41:12,756][93406] InferenceWorker_p0-w0: min num requests: 1
8
- [2024-08-26 14:41:12,762][93406] Starting all processes...
9
- [2024-08-26 14:41:12,762][93406] Starting process learner_proc0
10
- [2024-08-26 14:41:13,651][93406] Starting all processes...
11
- [2024-08-26 14:41:13,653][93536] Using GPUs [0] for process 0 (actually maps to GPUs [0])
12
- [2024-08-26 14:41:13,653][93536] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
13
- [2024-08-26 14:41:13,660][93406] Starting process inference_proc0-0
14
- [2024-08-26 14:41:13,660][93406] Starting process rollout_proc0
15
- [2024-08-26 14:41:13,661][93406] Starting process rollout_proc1
16
- [2024-08-26 14:41:13,661][93406] Starting process rollout_proc2
17
- [2024-08-26 14:41:13,661][93406] Starting process rollout_proc3
18
- [2024-08-26 14:41:13,702][93536] Num visible devices: 1
19
- [2024-08-26 14:41:13,794][93536] Starting seed is not provided
20
- [2024-08-26 14:41:13,794][93536] Using GPUs [0] for process 0 (actually maps to GPUs [0])
21
- [2024-08-26 14:41:13,794][93536] Initializing actor-critic model on device cuda:0
22
- [2024-08-26 14:41:13,794][93536] RunningMeanStd input shape: (3, 72, 128)
23
- [2024-08-26 14:41:13,798][93536] RunningMeanStd input shape: (1,)
24
- [2024-08-26 14:41:13,805][93536] ConvEncoder: input_channels=3
25
- [2024-08-26 14:41:13,897][93536] Conv encoder output size: 512
26
- [2024-08-26 14:41:13,897][93536] Policy head output size: 512
27
- [2024-08-26 14:41:13,912][93536] Created Actor Critic model with architecture:
28
- [2024-08-26 14:41:13,912][93536] ActorCriticSharedWeights(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  (obs_normalizer): ObservationNormalizer(
30
  (running_mean_std): RunningMeanStdDictInPlace(
31
  (running_mean_std): ModuleDict(
@@ -66,196 +97,707 @@
66
  (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
67
  )
68
  )
69
- [2024-08-26 14:41:14,135][93536] Using optimizer <class 'torch.optim.adam.Adam'>
70
- [2024-08-26 14:41:14,459][93589] Worker 3 uses CPU cores [24, 25, 26, 27, 28, 29, 30, 31]
71
- [2024-08-26 14:41:14,463][93588] Worker 2 uses CPU cores [16, 17, 18, 19, 20, 21, 22, 23]
72
- [2024-08-26 14:41:14,464][93586] Using GPUs [0] for process 0 (actually maps to GPUs [0])
73
- [2024-08-26 14:41:14,465][93586] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
74
- [2024-08-26 14:41:14,469][93587] Worker 1 uses CPU cores [8, 9, 10, 11, 12, 13, 14, 15]
75
- [2024-08-26 14:41:14,480][93586] Num visible devices: 1
76
- [2024-08-26 14:41:14,484][93585] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7]
77
- [2024-08-26 14:41:14,601][93536] No checkpoints found
78
- [2024-08-26 14:41:14,601][93536] Did not load from checkpoint, starting from scratch!
79
- [2024-08-26 14:41:14,601][93536] Initialized policy 0 weights for model version 0
80
- [2024-08-26 14:41:14,605][93536] LearnerWorker_p0 finished initialization!
81
- [2024-08-26 14:41:14,605][93536] Using GPUs [0] for process 0 (actually maps to GPUs [0])
82
- [2024-08-26 14:41:14,700][93586] RunningMeanStd input shape: (3, 72, 128)
83
- [2024-08-26 14:41:14,700][93586] RunningMeanStd input shape: (1,)
84
- [2024-08-26 14:41:14,705][93586] ConvEncoder: input_channels=3
85
- [2024-08-26 14:41:14,745][93586] Conv encoder output size: 512
86
- [2024-08-26 14:41:14,745][93586] Policy head output size: 512
87
- [2024-08-26 14:41:14,772][93406] Inference worker 0-0 is ready!
88
- [2024-08-26 14:41:14,772][93406] All inference workers are ready! Signal rollout workers to start!
89
- [2024-08-26 14:41:14,785][93589] Doom resolution: 160x120, resize resolution: (128, 72)
90
- [2024-08-26 14:41:14,785][93588] Doom resolution: 160x120, resize resolution: (128, 72)
91
- [2024-08-26 14:41:14,786][93585] Doom resolution: 160x120, resize resolution: (128, 72)
92
- [2024-08-26 14:41:14,786][93587] Doom resolution: 160x120, resize resolution: (128, 72)
93
- [2024-08-26 14:41:14,993][93588] Decorrelating experience for 0 frames...
94
- [2024-08-26 14:41:14,993][93587] Decorrelating experience for 0 frames...
95
- [2024-08-26 14:41:15,109][93588] Decorrelating experience for 32 frames...
96
- [2024-08-26 14:41:15,266][93587] Decorrelating experience for 32 frames...
97
- [2024-08-26 14:41:16,129][93406] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
98
- [2024-08-26 14:41:16,129][93406] Avg episode reward: [(0, '4.300')]
99
- [2024-08-26 14:41:16,959][93536] Signal inference workers to stop experience collection...
100
- [2024-08-26 14:41:16,962][93586] InferenceWorker_p0-w0: stopping experience collection
101
- [2024-08-26 14:41:17,111][93536] Signal inference workers to resume experience collection...
102
- [2024-08-26 14:41:17,111][93586] InferenceWorker_p0-w0: resuming experience collection
103
- [2024-08-26 14:41:21,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 28672. Throughput: 0: 1516.0. Samples: 7580. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
104
- [2024-08-26 14:41:21,129][93406] Avg episode reward: [(0, '4.681')]
105
- [2024-08-26 14:41:22,689][93586] Updated weights for policy 0, policy_version 10 (0.0083)
106
- [2024-08-26 14:41:26,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 57344. Throughput: 0: 1189.3. Samples: 11893. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
107
- [2024-08-26 14:41:26,129][93406] Avg episode reward: [(0, '4.525')]
108
- [2024-08-26 14:41:29,932][93586] Updated weights for policy 0, policy_version 20 (0.0007)
109
- [2024-08-26 14:41:31,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 86016. Throughput: 0: 1353.1. Samples: 20296. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
110
- [2024-08-26 14:41:31,129][93406] Avg episode reward: [(0, '4.472')]
111
- [2024-08-26 14:41:32,749][93406] Heartbeat connected on Batcher_0
112
- [2024-08-26 14:41:32,752][93406] Heartbeat connected on LearnerWorker_p0
113
- [2024-08-26 14:41:32,757][93406] Heartbeat connected on InferenceWorker_p0-w0
114
- [2024-08-26 14:41:32,759][93406] Heartbeat connected on RolloutWorker_w1
115
- [2024-08-26 14:41:32,760][93406] Heartbeat connected on RolloutWorker_w2
116
- [2024-08-26 14:41:36,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 114688. Throughput: 0: 1447.3. Samples: 28946. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
117
- [2024-08-26 14:41:36,129][93406] Avg episode reward: [(0, '4.364')]
118
- [2024-08-26 14:41:36,130][93536] Saving new best policy, reward=4.364!
119
- [2024-08-26 14:41:36,967][93586] Updated weights for policy 0, policy_version 30 (0.0006)
120
- [2024-08-26 14:41:41,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 143360. Throughput: 0: 1320.1. Samples: 33002. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
121
- [2024-08-26 14:41:41,129][93406] Avg episode reward: [(0, '4.402')]
122
- [2024-08-26 14:41:41,131][93536] Saving new best policy, reward=4.402!
123
- [2024-08-26 14:41:44,663][93586] Updated weights for policy 0, policy_version 40 (0.0006)
124
- [2024-08-26 14:41:46,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 172032. Throughput: 0: 1372.6. Samples: 41179. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
125
- [2024-08-26 14:41:46,129][93406] Avg episode reward: [(0, '4.472')]
126
- [2024-08-26 14:41:46,129][93536] Saving new best policy, reward=4.472!
127
- [2024-08-26 14:41:51,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5617.4, 300 sec: 5617.4). Total num frames: 196608. Throughput: 0: 1419.0. Samples: 49665. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
128
- [2024-08-26 14:41:51,129][93406] Avg episode reward: [(0, '4.387')]
129
- [2024-08-26 14:41:52,166][93586] Updated weights for policy 0, policy_version 50 (0.0006)
130
- [2024-08-26 14:41:56,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5632.0, 300 sec: 5632.0). Total num frames: 225280. Throughput: 0: 1333.3. Samples: 53331. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
131
- [2024-08-26 14:41:56,129][93406] Avg episode reward: [(0, '4.295')]
132
- [2024-08-26 14:41:59,640][93586] Updated weights for policy 0, policy_version 60 (0.0006)
133
- [2024-08-26 14:42:01,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5552.4, 300 sec: 5552.4). Total num frames: 249856. Throughput: 0: 1369.6. Samples: 61633. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
134
- [2024-08-26 14:42:01,129][93406] Avg episode reward: [(0, '4.543')]
135
- [2024-08-26 14:42:01,131][93536] Saving new best policy, reward=4.543!
136
- [2024-08-26 14:42:06,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5570.6, 300 sec: 5570.6). Total num frames: 278528. Throughput: 0: 1376.0. Samples: 69502. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
137
- [2024-08-26 14:42:06,129][93406] Avg episode reward: [(0, '4.214')]
138
- [2024-08-26 14:42:07,293][93586] Updated weights for policy 0, policy_version 70 (0.0006)
139
- [2024-08-26 14:42:11,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5511.0, 300 sec: 5511.0). Total num frames: 303104. Throughput: 0: 1374.7. Samples: 73753. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
140
- [2024-08-26 14:42:11,129][93406] Avg episode reward: [(0, '4.227')]
141
- [2024-08-26 14:42:14,772][93586] Updated weights for policy 0, policy_version 80 (0.0006)
142
- [2024-08-26 14:42:16,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5529.6, 300 sec: 5529.6). Total num frames: 331776. Throughput: 0: 1370.7. Samples: 81976. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
143
- [2024-08-26 14:42:16,129][93406] Avg episode reward: [(0, '4.289')]
144
- [2024-08-26 14:42:21,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5529.6, 300 sec: 5545.4). Total num frames: 360448. Throughput: 0: 1361.9. Samples: 90230. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
145
- [2024-08-26 14:42:21,129][93406] Avg episode reward: [(0, '4.455')]
146
- [2024-08-26 14:42:22,192][93586] Updated weights for policy 0, policy_version 90 (0.0007)
147
- [2024-08-26 14:42:26,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5461.3, 300 sec: 5500.3). Total num frames: 385024. Throughput: 0: 1362.3. Samples: 94306. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
148
- [2024-08-26 14:42:26,129][93406] Avg episode reward: [(0, '4.460')]
149
- [2024-08-26 14:42:29,877][93586] Updated weights for policy 0, policy_version 100 (0.0007)
150
- [2024-08-26 14:42:31,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5461.3, 300 sec: 5515.9). Total num frames: 413696. Throughput: 0: 1359.2. Samples: 102341. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
151
- [2024-08-26 14:42:31,129][93406] Avg episode reward: [(0, '4.604')]
152
- [2024-08-26 14:42:31,131][93536] Saving new best policy, reward=4.604!
153
- [2024-08-26 14:42:36,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5529.6). Total num frames: 442368. Throughput: 0: 1350.6. Samples: 110440. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
154
- [2024-08-26 14:42:36,129][93406] Avg episode reward: [(0, '4.530')]
155
- [2024-08-26 14:42:37,622][93586] Updated weights for policy 0, policy_version 110 (0.0005)
156
- [2024-08-26 14:42:41,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5493.5). Total num frames: 466944. Throughput: 0: 1359.6. Samples: 114511. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
157
- [2024-08-26 14:42:41,129][93406] Avg episode reward: [(0, '4.406')]
158
- [2024-08-26 14:42:45,015][93586] Updated weights for policy 0, policy_version 120 (0.0006)
159
- [2024-08-26 14:42:46,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5506.8). Total num frames: 495616. Throughput: 0: 1354.6. Samples: 122591. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
160
- [2024-08-26 14:42:46,129][93406] Avg episode reward: [(0, '4.294')]
161
- [2024-08-26 14:42:51,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5518.8). Total num frames: 524288. Throughput: 0: 1365.8. Samples: 130963. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
162
- [2024-08-26 14:42:51,129][93406] Avg episode reward: [(0, '4.449')]
163
- [2024-08-26 14:42:52,340][93586] Updated weights for policy 0, policy_version 130 (0.0006)
164
- [2024-08-26 14:42:56,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5488.6). Total num frames: 548864. Throughput: 0: 1366.5. Samples: 135244. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
165
- [2024-08-26 14:42:56,129][93406] Avg episode reward: [(0, '4.447')]
166
- [2024-08-26 14:43:00,086][93586] Updated weights for policy 0, policy_version 140 (0.0006)
167
- [2024-08-26 14:43:01,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5461.3, 300 sec: 5500.3). Total num frames: 577536. Throughput: 0: 1355.5. Samples: 142972. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
168
- [2024-08-26 14:43:01,129][93406] Avg episode reward: [(0, '4.487')]
169
- [2024-08-26 14:43:06,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5473.7). Total num frames: 602112. Throughput: 0: 1352.6. Samples: 151096. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
170
- [2024-08-26 14:43:06,129][93406] Avg episode reward: [(0, '4.290')]
171
- [2024-08-26 14:43:07,647][93586] Updated weights for policy 0, policy_version 150 (0.0006)
172
- [2024-08-26 14:43:11,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5461.3, 300 sec: 5485.1). Total num frames: 630784. Throughput: 0: 1354.0. Samples: 155234. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
173
- [2024-08-26 14:43:11,129][93406] Avg episode reward: [(0, '4.314')]
174
- [2024-08-26 14:43:11,131][93536] Saving /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/checkpoint_p0/checkpoint_000000154_630784.pth...
175
- [2024-08-26 14:43:15,219][93586] Updated weights for policy 0, policy_version 160 (0.0007)
176
- [2024-08-26 14:43:16,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5461.3). Total num frames: 655360. Throughput: 0: 1354.4. Samples: 163287. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
177
- [2024-08-26 14:43:16,129][93406] Avg episode reward: [(0, '4.431')]
178
- [2024-08-26 14:43:21,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5472.3). Total num frames: 684032. Throughput: 0: 1346.3. Samples: 171023. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
179
- [2024-08-26 14:43:21,129][93406] Avg episode reward: [(0, '4.525')]
180
- [2024-08-26 14:43:23,245][93586] Updated weights for policy 0, policy_version 170 (0.0007)
181
- [2024-08-26 14:43:26,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5450.8). Total num frames: 708608. Throughput: 0: 1342.8. Samples: 174938. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
182
- [2024-08-26 14:43:26,129][93406] Avg episode reward: [(0, '4.382')]
183
- [2024-08-26 14:43:31,040][93586] Updated weights for policy 0, policy_version 180 (0.0006)
184
- [2024-08-26 14:43:31,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5461.3). Total num frames: 737280. Throughput: 0: 1344.3. Samples: 183086. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
185
- [2024-08-26 14:43:31,129][93406] Avg episode reward: [(0, '4.468')]
186
- [2024-08-26 14:43:36,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5324.8, 300 sec: 5441.8). Total num frames: 761856. Throughput: 0: 1333.8. Samples: 190986. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
187
- [2024-08-26 14:43:36,129][93406] Avg episode reward: [(0, '4.216')]
188
- [2024-08-26 14:43:38,609][93586] Updated weights for policy 0, policy_version 190 (0.0006)
189
- [2024-08-26 14:43:41,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5451.9). Total num frames: 790528. Throughput: 0: 1324.2. Samples: 194832. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
190
- [2024-08-26 14:43:41,129][93406] Avg episode reward: [(0, '4.313')]
191
- [2024-08-26 14:43:45,962][93586] Updated weights for policy 0, policy_version 200 (0.0006)
192
- [2024-08-26 14:43:46,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 5461.3). Total num frames: 819200. Throughput: 0: 1338.5. Samples: 203206. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
193
- [2024-08-26 14:43:46,129][93406] Avg episode reward: [(0, '4.373')]
194
- [2024-08-26 14:43:51,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5324.8, 300 sec: 5443.7). Total num frames: 843776. Throughput: 0: 1340.2. Samples: 211406. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
195
- [2024-08-26 14:43:51,129][93406] Avg episode reward: [(0, '4.403')]
196
- [2024-08-26 14:43:53,431][93586] Updated weights for policy 0, policy_version 210 (0.0004)
197
- [2024-08-26 14:43:56,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5452.8). Total num frames: 872448. Throughput: 0: 1341.2. Samples: 215586. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
198
- [2024-08-26 14:43:56,129][93406] Avg episode reward: [(0, '4.584')]
199
- [2024-08-26 14:44:01,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5324.8, 300 sec: 5436.5). Total num frames: 897024. Throughput: 0: 1336.3. Samples: 223422. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
200
- [2024-08-26 14:44:01,129][93406] Avg episode reward: [(0, '4.534')]
201
- [2024-08-26 14:44:01,329][93586] Updated weights for policy 0, policy_version 220 (0.0006)
202
- [2024-08-26 14:44:06,129][93406] Fps is (10 sec: 5324.8, 60 sec: 5393.1, 300 sec: 5445.3). Total num frames: 925696. Throughput: 0: 1344.3. Samples: 231516. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
203
- [2024-08-26 14:44:06,129][93406] Avg episode reward: [(0, '4.358')]
204
- [2024-08-26 14:44:08,638][93586] Updated weights for policy 0, policy_version 230 (0.0006)
205
- [2024-08-26 14:44:11,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 5453.5). Total num frames: 954368. Throughput: 0: 1352.2. Samples: 235785. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
206
- [2024-08-26 14:44:11,129][93406] Avg episode reward: [(0, '4.460')]
207
- [2024-08-26 14:44:16,104][93586] Updated weights for policy 0, policy_version 240 (0.0006)
208
- [2024-08-26 14:44:16,129][93406] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5461.3). Total num frames: 983040. Throughput: 0: 1351.9. Samples: 243923. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
209
- [2024-08-26 14:44:16,129][93406] Avg episode reward: [(0, '4.338')]
210
- [2024-08-26 14:44:20,555][93536] Saving /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/checkpoint_p0/checkpoint_000000246_1007616.pth...
211
- [2024-08-26 14:44:20,556][93406] Component Batcher_0 stopped!
212
- [2024-08-26 14:44:20,556][93406] Component RolloutWorker_w0 process died already! Don't wait for it.
213
- [2024-08-26 14:44:20,556][93406] Component RolloutWorker_w3 process died already! Don't wait for it.
214
- [2024-08-26 14:44:20,557][93536] Stopping Batcher_0...
215
- [2024-08-26 14:44:20,557][93536] Loop batcher_evt_loop terminating...
216
- [2024-08-26 14:44:20,566][93586] Weights refcount: 2 0
217
- [2024-08-26 14:44:20,567][93586] Stopping InferenceWorker_p0-w0...
218
- [2024-08-26 14:44:20,567][93586] Loop inference_proc0-0_evt_loop terminating...
219
- [2024-08-26 14:44:20,567][93406] Component InferenceWorker_p0-w0 stopped!
220
- [2024-08-26 14:44:20,571][93406] Component RolloutWorker_w1 stopped!
221
- [2024-08-26 14:44:20,571][93587] Stopping RolloutWorker_w1...
222
- [2024-08-26 14:44:20,571][93588] Stopping RolloutWorker_w2...
223
- [2024-08-26 14:44:20,571][93406] Component RolloutWorker_w2 stopped!
224
- [2024-08-26 14:44:20,571][93588] Loop rollout_proc2_evt_loop terminating...
225
- [2024-08-26 14:44:20,571][93587] Loop rollout_proc1_evt_loop terminating...
226
- [2024-08-26 14:44:20,580][93536] Saving /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/checkpoint_p0/checkpoint_000000246_1007616.pth...
227
- [2024-08-26 14:44:20,612][93536] Stopping LearnerWorker_p0...
228
- [2024-08-26 14:44:20,612][93406] Component LearnerWorker_p0 stopped!
229
- [2024-08-26 14:44:20,612][93536] Loop learner_proc0_evt_loop terminating...
230
- [2024-08-26 14:44:20,612][93406] Waiting for process learner_proc0 to stop...
231
- [2024-08-26 14:44:20,970][93406] Waiting for process inference_proc0-0 to join...
232
- [2024-08-26 14:44:20,970][93406] Waiting for process rollout_proc0 to join...
233
- [2024-08-26 14:44:20,971][93406] Waiting for process rollout_proc1 to join...
234
- [2024-08-26 14:44:20,971][93406] Waiting for process rollout_proc2 to join...
235
- [2024-08-26 14:44:20,971][93406] Waiting for process rollout_proc3 to join...
236
- [2024-08-26 14:44:20,971][93406] Batcher 0 profile tree view:
237
- batching: 1.5273, releasing_batches: 0.0036
238
- [2024-08-26 14:44:20,971][93406] InferenceWorker_p0-w0 profile tree view:
239
- wait_policy: 0.0000
240
- wait_policy_total: 1.2098
241
- update_model: 1.4837
242
- weight_update: 0.0006
243
- one_step: 0.0018
244
- handle_policy_step: 176.9933
245
- deserialize: 2.2360, stack: 0.5673, obs_to_device_normalize: 38.1132, forward: 96.0858, send_messages: 6.6587
246
- prepare_outputs: 28.4805
247
- to_cpu: 22.9205
248
- [2024-08-26 14:44:20,971][93406] Learner 0 profile tree view:
249
- misc: 0.0006, prepare_batch: 5.5755
250
- train: 9.9650
251
- epoch_init: 0.0008, minibatch_init: 0.0007, losses_postprocess: 0.0871, kl_divergence: 0.0895, after_optimizer: 4.7762
252
- calculate_losses: 3.8292
253
- losses_init: 0.0004, forward_head: 0.1671, bptt_initial: 3.0219, tail: 0.0856, advantages_returns: 0.0212, losses: 0.3656
254
- bptt: 0.1487
255
- bptt_forward_core: 0.1425
256
- update: 1.1345
257
- clip: 0.1040
258
- [2024-08-26 14:44:20,971][93406] Loop Runner_EvtLoop terminating...
259
- [2024-08-26 14:44:20,971][93406] Runner profile tree view:
260
- main_loop: 188.2098
261
- [2024-08-26 14:44:20,971][93406] Collected {0: 1007616}, FPS: 5353.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2024-08-26 14:51:14,698][98398] Saving configuration to /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/config.json...
2
+ [2024-08-26 14:51:14,698][98398] Rollout worker 0 uses device cpu
3
+ [2024-08-26 14:51:14,699][98398] Rollout worker 1 uses device cpu
4
+ [2024-08-26 14:51:14,699][98398] Rollout worker 2 uses device cpu
5
+ [2024-08-26 14:51:14,699][98398] Rollout worker 3 uses device cpu
6
+ [2024-08-26 14:51:14,699][98398] Rollout worker 4 uses device cpu
7
+ [2024-08-26 14:51:14,699][98398] Rollout worker 5 uses device cpu
8
+ [2024-08-26 14:51:14,699][98398] Rollout worker 6 uses device cpu
9
+ [2024-08-26 14:51:14,699][98398] Rollout worker 7 uses device cpu
10
+ [2024-08-26 14:51:14,699][98398] Rollout worker 8 uses device cpu
11
+ [2024-08-26 14:51:14,699][98398] Rollout worker 9 uses device cpu
12
+ [2024-08-26 14:51:14,699][98398] Rollout worker 10 uses device cpu
13
+ [2024-08-26 14:51:14,699][98398] Rollout worker 11 uses device cpu
14
+ [2024-08-26 14:51:14,699][98398] Rollout worker 12 uses device cpu
15
+ [2024-08-26 14:51:14,699][98398] Rollout worker 13 uses device cpu
16
+ [2024-08-26 14:51:14,699][98398] Rollout worker 14 uses device cpu
17
+ [2024-08-26 14:51:14,699][98398] Rollout worker 15 uses device cpu
18
+ [2024-08-26 14:51:14,699][98398] Rollout worker 16 uses device cpu
19
+ [2024-08-26 14:51:14,699][98398] Rollout worker 17 uses device cpu
20
+ [2024-08-26 14:51:14,699][98398] Rollout worker 18 uses device cpu
21
+ [2024-08-26 14:51:14,699][98398] Rollout worker 19 uses device cpu
22
+ [2024-08-26 14:51:14,699][98398] Rollout worker 20 uses device cpu
23
+ [2024-08-26 14:51:14,699][98398] Rollout worker 21 uses device cpu
24
+ [2024-08-26 14:51:14,699][98398] Rollout worker 22 uses device cpu
25
+ [2024-08-26 14:51:14,700][98398] Rollout worker 23 uses device cpu
26
+ [2024-08-26 14:51:14,813][98398] Using GPUs [0] for process 0 (actually maps to GPUs [0])
27
+ [2024-08-26 14:51:14,813][98398] InferenceWorker_p0-w0: min num requests: 8
28
+ [2024-08-26 14:51:14,846][98398] Starting all processes...
29
+ [2024-08-26 14:51:14,846][98398] Starting process learner_proc0
30
+ [2024-08-26 14:51:15,765][98398] Starting all processes...
31
+ [2024-08-26 14:51:15,767][98522] Using GPUs [0] for process 0 (actually maps to GPUs [0])
32
+ [2024-08-26 14:51:15,767][98522] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
33
+ [2024-08-26 14:51:15,775][98398] Starting process inference_proc0-0
34
+ [2024-08-26 14:51:15,776][98398] Starting process rollout_proc0
35
+ [2024-08-26 14:51:15,776][98398] Starting process rollout_proc1
36
+ [2024-08-26 14:51:15,776][98398] Starting process rollout_proc2
37
+ [2024-08-26 14:51:15,778][98398] Starting process rollout_proc3
38
+ [2024-08-26 14:51:15,780][98398] Starting process rollout_proc4
39
+ [2024-08-26 14:51:15,782][98398] Starting process rollout_proc5
40
+ [2024-08-26 14:51:15,784][98398] Starting process rollout_proc6
41
+ [2024-08-26 14:51:15,785][98398] Starting process rollout_proc7
42
+ [2024-08-26 14:51:15,788][98398] Starting process rollout_proc8
43
+ [2024-08-26 14:51:15,793][98398] Starting process rollout_proc9
44
+ [2024-08-26 14:51:15,795][98398] Starting process rollout_proc10
45
+ [2024-08-26 14:51:15,795][98398] Starting process rollout_proc11
46
+ [2024-08-26 14:51:15,797][98398] Starting process rollout_proc12
47
+ [2024-08-26 14:51:15,797][98398] Starting process rollout_proc13
48
+ [2024-08-26 14:51:15,797][98398] Starting process rollout_proc14
49
+ [2024-08-26 14:51:15,820][98522] Num visible devices: 1
50
+ [2024-08-26 14:51:15,985][98522] Starting seed is not provided
51
+ [2024-08-26 14:51:15,985][98522] Using GPUs [0] for process 0 (actually maps to GPUs [0])
52
+ [2024-08-26 14:51:15,985][98522] Initializing actor-critic model on device cuda:0
53
+ [2024-08-26 14:51:15,986][98522] RunningMeanStd input shape: (3, 72, 128)
54
+ [2024-08-26 14:51:15,992][98522] RunningMeanStd input shape: (1,)
55
+ [2024-08-26 14:51:15,999][98522] ConvEncoder: input_channels=3
56
+ [2024-08-26 14:51:16,205][98522] Conv encoder output size: 512
57
+ [2024-08-26 14:51:16,205][98522] Policy head output size: 512
58
+ [2024-08-26 14:51:16,232][98522] Created Actor Critic model with architecture:
59
+ [2024-08-26 14:51:16,233][98522] ActorCriticSharedWeights(
60
  (obs_normalizer): ObservationNormalizer(
61
  (running_mean_std): RunningMeanStdDictInPlace(
62
  (running_mean_std): ModuleDict(
 
97
  (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
98
  )
99
  )
100
+ [2024-08-26 14:51:16,485][98522] Using optimizer <class 'torch.optim.adam.Adam'>
101
+ [2024-08-26 14:51:17,021][98398] Starting process rollout_proc15
102
+ [2024-08-26 14:51:17,025][98579] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
103
+ [2024-08-26 14:51:17,039][98398] Starting process rollout_proc16
104
+ [2024-08-26 14:51:17,042][98578] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
105
+ [2024-08-26 14:51:17,086][98398] Starting process rollout_proc17
106
+ [2024-08-26 14:51:17,097][98581] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
107
+ [2024-08-26 14:51:17,107][98398] Starting process rollout_proc18
108
+ [2024-08-26 14:51:17,108][98398] Starting process rollout_proc19
109
+ [2024-08-26 14:51:17,115][98398] Starting process rollout_proc20
110
+ [2024-08-26 14:51:17,120][98589] Worker 12 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
111
+ [2024-08-26 14:51:17,127][98580] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
112
+ [2024-08-26 14:51:17,135][98398] Starting process rollout_proc21
113
+ [2024-08-26 14:51:17,152][98398] Starting process rollout_proc22
114
+ [2024-08-26 14:51:17,152][98577] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
115
+ [2024-08-26 14:51:17,154][98398] Starting process rollout_proc23
116
+ [2024-08-26 14:51:17,157][98585] Worker 8 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
117
+ [2024-08-26 14:51:17,176][98597] Worker 13 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
118
+ [2024-08-26 14:51:17,177][98584] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
119
+ [2024-08-26 14:51:17,180][98605] Worker 14 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
120
+ [2024-08-26 14:51:17,182][98576] Using GPUs [0] for process 0 (actually maps to GPUs [0])
121
+ [2024-08-26 14:51:17,182][98576] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
122
+ [2024-08-26 14:51:17,199][98576] Num visible devices: 1
123
+ [2024-08-26 14:51:17,236][98587] Worker 10 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
124
+ [2024-08-26 14:51:17,246][98583] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
125
+ [2024-08-26 14:51:17,277][98582] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
126
+ [2024-08-26 14:51:17,360][98586] Worker 9 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
127
+ [2024-08-26 14:51:17,401][98522] No checkpoints found
128
+ [2024-08-26 14:51:17,401][98522] Did not load from checkpoint, starting from scratch!
129
+ [2024-08-26 14:51:17,401][98522] Initialized policy 0 weights for model version 0
130
+ [2024-08-26 14:51:17,402][98588] Worker 11 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
131
+ [2024-08-26 14:51:17,405][98522] Using GPUs [0] for process 0 (actually maps to GPUs [0])
132
+ [2024-08-26 14:51:17,412][98522] LearnerWorker_p0 finished initialization!
133
+ [2024-08-26 14:51:17,645][98576] RunningMeanStd input shape: (3, 72, 128)
134
+ [2024-08-26 14:51:17,645][98576] RunningMeanStd input shape: (1,)
135
+ [2024-08-26 14:51:17,650][98576] ConvEncoder: input_channels=3
136
+ [2024-08-26 14:51:17,696][98576] Conv encoder output size: 512
137
+ [2024-08-26 14:51:17,696][98576] Policy head output size: 512
138
+ [2024-08-26 14:51:18,110][99128] Worker 15 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
139
+ [2024-08-26 14:51:18,193][99354] Worker 23 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
140
+ [2024-08-26 14:51:18,195][99289] Worker 20 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
141
+ [2024-08-26 14:51:18,222][99263] Worker 19 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
142
+ [2024-08-26 14:51:18,225][99160] Worker 16 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
143
+ [2024-08-26 14:51:18,231][99265] Worker 18 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
144
+ [2024-08-26 14:51:18,236][99254] Worker 17 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
145
+ [2024-08-26 14:51:18,237][99353] Worker 22 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
146
+ [2024-08-26 14:51:18,250][98398] Inference worker 0-0 is ready!
147
+ [2024-08-26 14:51:18,250][98398] All inference workers are ready! Signal rollout workers to start!
148
+ [2024-08-26 14:51:18,250][98398] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
149
+ [2024-08-26 14:51:18,251][99322] Worker 21 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
150
+ [2024-08-26 14:51:18,318][98605] Doom resolution: 160x120, resize resolution: (128, 72)
151
+ [2024-08-26 14:51:18,326][98578] Doom resolution: 160x120, resize resolution: (128, 72)
152
+ [2024-08-26 14:51:18,326][98584] Doom resolution: 160x120, resize resolution: (128, 72)
153
+ [2024-08-26 14:51:18,317][98588] Doom resolution: 160x120, resize resolution: (128, 72)
154
+ [2024-08-26 14:51:18,322][98587] Doom resolution: 160x120, resize resolution: (128, 72)
155
+ [2024-08-26 14:51:18,328][99289] Doom resolution: 160x120, resize resolution: (128, 72)
156
+ [2024-08-26 14:51:18,328][98586] Doom resolution: 160x120, resize resolution: (128, 72)
157
+ [2024-08-26 14:51:18,329][98580] Doom resolution: 160x120, resize resolution: (128, 72)
158
+ [2024-08-26 14:51:18,329][98597] Doom resolution: 160x120, resize resolution: (128, 72)
159
+ [2024-08-26 14:51:18,329][99354] Doom resolution: 160x120, resize resolution: (128, 72)
160
+ [2024-08-26 14:51:18,331][98583] Doom resolution: 160x120, resize resolution: (128, 72)
161
+ [2024-08-26 14:51:18,331][98589] Doom resolution: 160x120, resize resolution: (128, 72)
162
+ [2024-08-26 14:51:18,332][98577] Doom resolution: 160x120, resize resolution: (128, 72)
163
+ [2024-08-26 14:51:18,335][99263] Doom resolution: 160x120, resize resolution: (128, 72)
164
+ [2024-08-26 14:51:18,339][99128] Doom resolution: 160x120, resize resolution: (128, 72)
165
+ [2024-08-26 14:51:18,340][98579] Doom resolution: 160x120, resize resolution: (128, 72)
166
+ [2024-08-26 14:51:18,341][98581] Doom resolution: 160x120, resize resolution: (128, 72)
167
+ [2024-08-26 14:51:18,342][98582] Doom resolution: 160x120, resize resolution: (128, 72)
168
+ [2024-08-26 14:51:18,349][98585] Doom resolution: 160x120, resize resolution: (128, 72)
169
+ [2024-08-26 14:51:18,360][99322] Doom resolution: 160x120, resize resolution: (128, 72)
170
+ [2024-08-26 14:51:18,365][99265] Doom resolution: 160x120, resize resolution: (128, 72)
171
+ [2024-08-26 14:51:18,371][99353] Doom resolution: 160x120, resize resolution: (128, 72)
172
+ [2024-08-26 14:51:18,379][99254] Doom resolution: 160x120, resize resolution: (128, 72)
173
+ [2024-08-26 14:51:18,382][99160] Doom resolution: 160x120, resize resolution: (128, 72)
174
+ [2024-08-26 14:51:18,705][98577] Decorrelating experience for 0 frames...
175
+ [2024-08-26 14:51:18,705][99322] Decorrelating experience for 0 frames...
176
+ [2024-08-26 14:51:18,705][98580] Decorrelating experience for 0 frames...
177
+ [2024-08-26 14:51:18,705][98581] Decorrelating experience for 0 frames...
178
+ [2024-08-26 14:51:18,705][98586] Decorrelating experience for 0 frames...
179
+ [2024-08-26 14:51:18,707][98579] Decorrelating experience for 0 frames...
180
+ [2024-08-26 14:51:18,707][98605] Decorrelating experience for 0 frames...
181
+ [2024-08-26 14:51:18,707][98587] Decorrelating experience for 0 frames...
182
+ [2024-08-26 14:51:18,708][99265] Decorrelating experience for 0 frames...
183
+ [2024-08-26 14:51:18,843][98577] Decorrelating experience for 32 frames...
184
+ [2024-08-26 14:51:18,843][98581] Decorrelating experience for 32 frames...
185
+ [2024-08-26 14:51:18,846][98587] Decorrelating experience for 32 frames...
186
+ [2024-08-26 14:51:18,846][98584] Decorrelating experience for 0 frames...
187
+ [2024-08-26 14:51:18,847][99322] Decorrelating experience for 32 frames...
188
+ [2024-08-26 14:51:18,850][98580] Decorrelating experience for 32 frames...
189
+ [2024-08-26 14:51:18,851][98579] Decorrelating experience for 32 frames...
190
+ [2024-08-26 14:51:18,854][99263] Decorrelating experience for 0 frames...
191
+ [2024-08-26 14:51:18,888][98583] Decorrelating experience for 0 frames...
192
+ [2024-08-26 14:51:18,981][98584] Decorrelating experience for 32 frames...
193
+ [2024-08-26 14:51:18,981][98586] Decorrelating experience for 32 frames...
194
+ [2024-08-26 14:51:18,986][98581] Decorrelating experience for 64 frames...
195
+ [2024-08-26 14:51:18,986][99263] Decorrelating experience for 32 frames...
196
+ [2024-08-26 14:51:18,988][99322] Decorrelating experience for 64 frames...
197
+ [2024-08-26 14:51:18,988][98605] Decorrelating experience for 32 frames...
198
+ [2024-08-26 14:51:19,002][98578] Decorrelating experience for 0 frames...
199
+ [2024-08-26 14:51:19,006][99289] Decorrelating experience for 0 frames...
200
+ [2024-08-26 14:51:19,046][98587] Decorrelating experience for 64 frames...
201
+ [2024-08-26 14:51:19,122][98583] Decorrelating experience for 32 frames...
202
+ [2024-08-26 14:51:19,122][98588] Decorrelating experience for 0 frames...
203
+ [2024-08-26 14:51:19,128][99353] Decorrelating experience for 0 frames...
204
+ [2024-08-26 14:51:19,135][98577] Decorrelating experience for 64 frames...
205
+ [2024-08-26 14:51:19,136][99265] Decorrelating experience for 32 frames...
206
+ [2024-08-26 14:51:19,142][99160] Decorrelating experience for 0 frames...
207
+ [2024-08-26 14:51:19,149][98581] Decorrelating experience for 96 frames...
208
+ [2024-08-26 14:51:19,194][98578] Decorrelating experience for 32 frames...
209
+ [2024-08-26 14:51:19,258][98583] Decorrelating experience for 64 frames...
210
+ [2024-08-26 14:51:19,258][98582] Decorrelating experience for 0 frames...
211
+ [2024-08-26 14:51:19,259][99160] Decorrelating experience for 32 frames...
212
+ [2024-08-26 14:51:19,260][99128] Decorrelating experience for 0 frames...
213
+ [2024-08-26 14:51:19,277][98589] Decorrelating experience for 0 frames...
214
+ [2024-08-26 14:51:19,279][99263] Decorrelating experience for 64 frames...
215
+ [2024-08-26 14:51:19,288][98586] Decorrelating experience for 64 frames...
216
+ [2024-08-26 14:51:19,330][98578] Decorrelating experience for 64 frames...
217
+ [2024-08-26 14:51:19,345][98580] Decorrelating experience for 64 frames...
218
+ [2024-08-26 14:51:19,397][98579] Decorrelating experience for 64 frames...
219
+ [2024-08-26 14:51:19,397][98597] Decorrelating experience for 0 frames...
220
+ [2024-08-26 14:51:19,399][99354] Decorrelating experience for 0 frames...
221
+ [2024-08-26 14:51:19,403][99353] Decorrelating experience for 32 frames...
222
+ [2024-08-26 14:51:19,425][99263] Decorrelating experience for 96 frames...
223
+ [2024-08-26 14:51:19,428][99254] Decorrelating experience for 0 frames...
224
+ [2024-08-26 14:51:19,434][98586] Decorrelating experience for 96 frames...
225
+ [2024-08-26 14:51:19,480][98578] Decorrelating experience for 96 frames...
226
+ [2024-08-26 14:51:19,516][98581] Decorrelating experience for 128 frames...
227
+ [2024-08-26 14:51:19,525][98597] Decorrelating experience for 32 frames...
228
+ [2024-08-26 14:51:19,545][98579] Decorrelating experience for 96 frames...
229
+ [2024-08-26 14:51:19,545][98587] Decorrelating experience for 96 frames...
230
+ [2024-08-26 14:51:19,563][99353] Decorrelating experience for 64 frames...
231
+ [2024-08-26 14:51:19,588][99128] Decorrelating experience for 32 frames...
232
+ [2024-08-26 14:51:19,597][99289] Decorrelating experience for 32 frames...
233
+ [2024-08-26 14:51:19,650][98585] Decorrelating experience for 0 frames...
234
+ [2024-08-26 14:51:19,663][98577] Decorrelating experience for 96 frames...
235
+ [2024-08-26 14:51:19,668][98583] Decorrelating experience for 96 frames...
236
+ [2024-08-26 14:51:19,683][99354] Decorrelating experience for 32 frames...
237
+ [2024-08-26 14:51:19,698][99263] Decorrelating experience for 128 frames...
238
+ [2024-08-26 14:51:19,708][98578] Decorrelating experience for 128 frames...
239
+ [2024-08-26 14:51:19,727][99128] Decorrelating experience for 64 frames...
240
+ [2024-08-26 14:51:19,758][98580] Decorrelating experience for 96 frames...
241
+ [2024-08-26 14:51:19,781][99289] Decorrelating experience for 64 frames...
242
+ [2024-08-26 14:51:19,781][98584] Decorrelating experience for 64 frames...
243
+ [2024-08-26 14:51:19,820][98581] Decorrelating experience for 160 frames...
244
+ [2024-08-26 14:51:19,833][98597] Decorrelating experience for 64 frames...
245
+ [2024-08-26 14:51:19,856][98577] Decorrelating experience for 128 frames...
246
+ [2024-08-26 14:51:19,861][98586] Decorrelating experience for 128 frames...
247
+ [2024-08-26 14:51:19,884][99128] Decorrelating experience for 96 frames...
248
+ [2024-08-26 14:51:19,913][99322] Decorrelating experience for 96 frames...
249
+ [2024-08-26 14:51:19,914][99254] Decorrelating experience for 32 frames...
250
+ [2024-08-26 14:51:19,933][98580] Decorrelating experience for 128 frames...
251
+ [2024-08-26 14:51:19,972][99354] Decorrelating experience for 64 frames...
252
+ [2024-08-26 14:51:19,982][98587] Decorrelating experience for 128 frames...
253
+ [2024-08-26 14:51:20,001][98583] Decorrelating experience for 128 frames...
254
+ [2024-08-26 14:51:20,004][98588] Decorrelating experience for 32 frames...
255
+ [2024-08-26 14:51:20,039][99353] Decorrelating experience for 96 frames...
256
+ [2024-08-26 14:51:20,049][99289] Decorrelating experience for 96 frames...
257
+ [2024-08-26 14:51:20,055][98577] Decorrelating experience for 160 frames...
258
+ [2024-08-26 14:51:20,062][98579] Decorrelating experience for 128 frames...
259
+ [2024-08-26 14:51:20,061][99254] Decorrelating experience for 64 frames...
260
+ [2024-08-26 14:51:20,098][98578] Decorrelating experience for 160 frames...
261
+ [2024-08-26 14:51:20,114][99128] Decorrelating experience for 128 frames...
262
+ [2024-08-26 14:51:20,120][99354] Decorrelating experience for 96 frames...
263
+ [2024-08-26 14:51:20,126][98581] Decorrelating experience for 192 frames...
264
+ [2024-08-26 14:51:20,183][98605] Decorrelating experience for 64 frames...
265
+ [2024-08-26 14:51:20,185][98585] Decorrelating experience for 32 frames...
266
+ [2024-08-26 14:51:20,221][99353] Decorrelating experience for 128 frames...
267
+ [2024-08-26 14:51:20,247][99289] Decorrelating experience for 128 frames...
268
+ [2024-08-26 14:51:20,255][98597] Decorrelating experience for 96 frames...
269
+ [2024-08-26 14:51:20,259][98579] Decorrelating experience for 160 frames...
270
+ [2024-08-26 14:51:20,259][99160] Decorrelating experience for 64 frames...
271
+ [2024-08-26 14:51:20,280][98587] Decorrelating experience for 160 frames...
272
+ [2024-08-26 14:51:20,304][98588] Decorrelating experience for 64 frames...
273
+ [2024-08-26 14:51:20,335][98581] Decorrelating experience for 224 frames...
274
+ [2024-08-26 14:51:20,335][99322] Decorrelating experience for 128 frames...
275
+ [2024-08-26 14:51:20,350][98583] Decorrelating experience for 160 frames...
276
+ [2024-08-26 14:51:20,366][98577] Decorrelating experience for 192 frames...
277
+ [2024-08-26 14:51:20,405][98582] Decorrelating experience for 32 frames...
278
+ [2024-08-26 14:51:20,413][99128] Decorrelating experience for 160 frames...
279
+ [2024-08-26 14:51:20,423][98580] Decorrelating experience for 160 frames...
280
+ [2024-08-26 14:51:20,432][98605] Decorrelating experience for 96 frames...
281
+ [2024-08-26 14:51:20,468][99160] Decorrelating experience for 96 frames...
282
+ [2024-08-26 14:51:20,485][99289] Decorrelating experience for 160 frames...
283
+ [2024-08-26 14:51:20,509][99353] Decorrelating experience for 160 frames...
284
+ [2024-08-26 14:51:20,562][98589] Decorrelating experience for 32 frames...
285
+ [2024-08-26 14:51:20,563][98588] Decorrelating experience for 96 frames...
286
+ [2024-08-26 14:51:20,566][98578] Decorrelating experience for 192 frames...
287
+ [2024-08-26 14:51:20,567][98581] Decorrelating experience for 256 frames...
288
+ [2024-08-26 14:51:20,588][98577] Decorrelating experience for 224 frames...
289
+ [2024-08-26 14:51:20,604][98605] Decorrelating experience for 128 frames...
290
+ [2024-08-26 14:51:20,635][98587] Decorrelating experience for 192 frames...
291
+ [2024-08-26 14:51:20,667][98579] Decorrelating experience for 192 frames...
292
+ [2024-08-26 14:51:20,696][98582] Decorrelating experience for 64 frames...
293
+ [2024-08-26 14:51:20,706][98589] Decorrelating experience for 64 frames...
294
+ [2024-08-26 14:51:20,707][99160] Decorrelating experience for 128 frames...
295
+ [2024-08-26 14:51:20,720][99254] Decorrelating experience for 96 frames...
296
+ [2024-08-26 14:51:20,722][98580] Decorrelating experience for 192 frames...
297
+ [2024-08-26 14:51:20,738][99265] Decorrelating experience for 64 frames...
298
+ [2024-08-26 14:51:20,802][98588] Decorrelating experience for 128 frames...
299
+ [2024-08-26 14:51:20,822][98581] Decorrelating experience for 288 frames...
300
+ [2024-08-26 14:51:20,829][99353] Decorrelating experience for 192 frames...
301
+ [2024-08-26 14:51:20,830][98577] Decorrelating experience for 256 frames...
302
+ [2024-08-26 14:51:20,838][98585] Decorrelating experience for 64 frames...
303
+ [2024-08-26 14:51:20,845][98578] Decorrelating experience for 224 frames...
304
+ [2024-08-26 14:51:20,860][98589] Decorrelating experience for 96 frames...
305
+ [2024-08-26 14:51:20,860][98587] Decorrelating experience for 224 frames...
306
+ [2024-08-26 14:51:20,864][99128] Decorrelating experience for 192 frames...
307
+ [2024-08-26 14:51:20,874][98597] Decorrelating experience for 128 frames...
308
+ [2024-08-26 14:51:20,944][98580] Decorrelating experience for 224 frames...
309
+ [2024-08-26 14:51:20,982][98588] Decorrelating experience for 160 frames...
310
+ [2024-08-26 14:51:20,988][98579] Decorrelating experience for 224 frames...
311
+ [2024-08-26 14:51:20,989][98605] Decorrelating experience for 160 frames...
312
+ [2024-08-26 14:51:20,995][99160] Decorrelating experience for 160 frames...
313
+ [2024-08-26 14:51:21,005][98585] Decorrelating experience for 96 frames...
314
+ [2024-08-26 14:51:21,059][99353] Decorrelating experience for 224 frames...
315
+ [2024-08-26 14:51:21,070][98597] Decorrelating experience for 160 frames...
316
+ [2024-08-26 14:51:21,085][99128] Decorrelating experience for 224 frames...
317
+ [2024-08-26 14:51:21,100][99263] Decorrelating experience for 160 frames...
318
+ [2024-08-26 14:51:21,135][99322] Decorrelating experience for 160 frames...
319
+ [2024-08-26 14:51:21,139][98577] Decorrelating experience for 288 frames...
320
+ [2024-08-26 14:51:21,142][98578] Decorrelating experience for 256 frames...
321
+ [2024-08-26 14:51:21,142][98581] Decorrelating experience for 320 frames...
322
+ [2024-08-26 14:51:21,142][98589] Decorrelating experience for 128 frames...
323
+ [2024-08-26 14:51:21,231][98580] Decorrelating experience for 256 frames...
324
+ [2024-08-26 14:51:21,239][98585] Decorrelating experience for 128 frames...
325
+ [2024-08-26 14:51:21,252][99265] Decorrelating experience for 96 frames...
326
+ [2024-08-26 14:51:21,283][98579] Decorrelating experience for 256 frames...
327
+ [2024-08-26 14:51:21,300][98588] Decorrelating experience for 192 frames...
328
+ [2024-08-26 14:51:21,327][98587] Decorrelating experience for 256 frames...
329
+ [2024-08-26 14:51:21,331][99128] Decorrelating experience for 256 frames...
330
+ [2024-08-26 14:51:21,389][99160] Decorrelating experience for 192 frames...
331
+ [2024-08-26 14:51:21,396][99254] Decorrelating experience for 128 frames...
332
+ [2024-08-26 14:51:21,397][99322] Decorrelating experience for 192 frames...
333
+ [2024-08-26 14:51:21,398][98597] Decorrelating experience for 192 frames...
334
+ [2024-08-26 14:51:21,438][98589] Decorrelating experience for 160 frames...
335
+ [2024-08-26 14:51:21,444][98585] Decorrelating experience for 160 frames...
336
+ [2024-08-26 14:51:21,462][98578] Decorrelating experience for 288 frames...
337
+ [2024-08-26 14:51:21,467][98583] Decorrelating experience for 192 frames...
338
+ [2024-08-26 14:51:21,522][98579] Decorrelating experience for 288 frames...
339
+ [2024-08-26 14:51:21,525][99265] Decorrelating experience for 128 frames...
340
+ [2024-08-26 14:51:21,531][98581] Decorrelating experience for 352 frames...
341
+ [2024-08-26 14:51:21,579][98588] Decorrelating experience for 224 frames...
342
+ [2024-08-26 14:51:21,601][98587] Decorrelating experience for 288 frames...
343
+ [2024-08-26 14:51:21,613][99289] Decorrelating experience for 192 frames...
344
+ [2024-08-26 14:51:21,616][99263] Decorrelating experience for 192 frames...
345
+ [2024-08-26 14:51:21,659][99353] Decorrelating experience for 256 frames...
346
+ [2024-08-26 14:51:21,667][98586] Decorrelating experience for 160 frames...
347
+ [2024-08-26 14:51:21,696][98577] Decorrelating experience for 320 frames...
348
+ [2024-08-26 14:51:21,715][98585] Decorrelating experience for 192 frames...
349
+ [2024-08-26 14:51:21,758][98589] Decorrelating experience for 192 frames...
350
+ [2024-08-26 14:51:21,770][98582] Decorrelating experience for 96 frames...
351
+ [2024-08-26 14:51:21,771][99322] Decorrelating experience for 224 frames...
352
+ [2024-08-26 14:51:21,823][99254] Decorrelating experience for 160 frames...
353
+ [2024-08-26 14:51:21,838][98588] Decorrelating experience for 256 frames...
354
+ [2024-08-26 14:51:21,850][99263] Decorrelating experience for 224 frames...
355
+ [2024-08-26 14:51:21,858][98578] Decorrelating experience for 320 frames...
356
+ [2024-08-26 14:51:21,934][99265] Decorrelating experience for 160 frames...
357
+ [2024-08-26 14:51:21,950][98597] Decorrelating experience for 224 frames...
358
+ [2024-08-26 14:51:21,961][99354] Decorrelating experience for 128 frames...
359
+ [2024-08-26 14:51:21,963][98585] Decorrelating experience for 224 frames...
360
+ [2024-08-26 14:51:21,981][98582] Decorrelating experience for 128 frames...
361
+ [2024-08-26 14:51:21,991][98587] Decorrelating experience for 320 frames...
362
+ [2024-08-26 14:51:22,003][98577] Decorrelating experience for 352 frames...
363
+ [2024-08-26 14:51:22,005][98580] Decorrelating experience for 288 frames...
364
+ [2024-08-26 14:51:22,084][99160] Decorrelating experience for 224 frames...
365
+ [2024-08-26 14:51:22,091][99254] Decorrelating experience for 192 frames...
366
+ [2024-08-26 14:51:22,094][98588] Decorrelating experience for 288 frames...
367
+ [2024-08-26 14:51:22,109][99128] Decorrelating experience for 288 frames...
368
+ [2024-08-26 14:51:22,152][98589] Decorrelating experience for 224 frames...
369
+ [2024-08-26 14:51:22,175][98578] Decorrelating experience for 352 frames...
370
+ [2024-08-26 14:51:22,192][98582] Decorrelating experience for 160 frames...
371
+ [2024-08-26 14:51:22,211][98597] Decorrelating experience for 256 frames...
372
+ [2024-08-26 14:51:22,242][99322] Decorrelating experience for 256 frames...
373
+ [2024-08-26 14:51:22,256][99289] Decorrelating experience for 224 frames...
374
+ [2024-08-26 14:51:22,258][99263] Decorrelating experience for 256 frames...
375
+ [2024-08-26 14:51:22,259][98583] Decorrelating experience for 224 frames...
376
+ [2024-08-26 14:51:22,302][98580] Decorrelating experience for 320 frames...
377
+ [2024-08-26 14:51:22,319][99265] Decorrelating experience for 192 frames...
378
+ [2024-08-26 14:51:22,320][98586] Decorrelating experience for 192 frames...
379
+ [2024-08-26 14:51:22,332][99254] Decorrelating experience for 224 frames...
380
+ [2024-08-26 14:51:22,394][98588] Decorrelating experience for 320 frames...
381
+ [2024-08-26 14:51:22,407][98579] Decorrelating experience for 320 frames...
382
+ [2024-08-26 14:51:22,452][98587] Decorrelating experience for 352 frames...
383
+ [2024-08-26 14:51:22,466][98582] Decorrelating experience for 192 frames...
384
+ [2024-08-26 14:51:22,509][99322] Decorrelating experience for 288 frames...
385
+ [2024-08-26 14:51:22,518][99289] Decorrelating experience for 256 frames...
386
+ [2024-08-26 14:51:22,542][99160] Decorrelating experience for 256 frames...
387
+ [2024-08-26 14:51:22,542][98586] Decorrelating experience for 224 frames...
388
+ [2024-08-26 14:51:22,556][99265] Decorrelating experience for 224 frames...
389
+ [2024-08-26 14:51:22,567][98583] Decorrelating experience for 256 frames...
390
+ [2024-08-26 14:51:22,608][99128] Decorrelating experience for 320 frames...
391
+ [2024-08-26 14:51:22,683][98588] Decorrelating experience for 352 frames...
392
+ [2024-08-26 14:51:22,695][99254] Decorrelating experience for 256 frames...
393
+ [2024-08-26 14:51:22,709][98580] Decorrelating experience for 352 frames...
394
+ [2024-08-26 14:51:22,714][98579] Decorrelating experience for 352 frames...
395
+ [2024-08-26 14:51:22,723][99354] Decorrelating experience for 160 frames...
396
+ [2024-08-26 14:51:22,775][98589] Decorrelating experience for 256 frames...
397
+ [2024-08-26 14:51:22,802][98586] Decorrelating experience for 256 frames...
398
+ [2024-08-26 14:51:22,817][99265] Decorrelating experience for 256 frames...
399
+ [2024-08-26 14:51:22,851][99289] Decorrelating experience for 288 frames...
400
+ [2024-08-26 14:51:22,852][98582] Decorrelating experience for 224 frames...
401
+ [2024-08-26 14:51:22,853][98583] Decorrelating experience for 288 frames...
402
+ [2024-08-26 14:51:22,890][98605] Decorrelating experience for 192 frames...
403
+ [2024-08-26 14:51:22,940][99263] Decorrelating experience for 288 frames...
404
+ [2024-08-26 14:51:22,971][99160] Decorrelating experience for 288 frames...
405
+ [2024-08-26 14:51:22,979][99128] Decorrelating experience for 352 frames...
406
+ [2024-08-26 14:51:23,014][99354] Decorrelating experience for 192 frames...
407
+ [2024-08-26 14:51:23,024][98584] Decorrelating experience for 96 frames...
408
+ [2024-08-26 14:51:23,025][98585] Decorrelating experience for 256 frames...
409
+ [2024-08-26 14:51:23,070][99254] Decorrelating experience for 288 frames...
410
+ [2024-08-26 14:51:23,073][98586] Decorrelating experience for 288 frames...
411
+ [2024-08-26 14:51:23,084][98398] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
412
+ [2024-08-26 14:51:23,085][98398] Avg episode reward: [(0, '0.993')]
413
+ [2024-08-26 14:51:23,146][99289] Decorrelating experience for 320 frames...
414
+ [2024-08-26 14:51:23,163][98582] Decorrelating experience for 256 frames...
415
+ [2024-08-26 14:51:23,163][98589] Decorrelating experience for 288 frames...
416
+ [2024-08-26 14:51:23,191][98583] Decorrelating experience for 320 frames...
417
+ [2024-08-26 14:51:23,241][99263] Decorrelating experience for 320 frames...
418
+ [2024-08-26 14:51:23,246][98605] Decorrelating experience for 224 frames...
419
+ [2024-08-26 14:51:23,264][99265] Decorrelating experience for 288 frames...
420
+ [2024-08-26 14:51:23,266][99354] Decorrelating experience for 224 frames...
421
+ [2024-08-26 14:51:23,318][98585] Decorrelating experience for 288 frames...
422
+ [2024-08-26 14:51:23,336][98584] Decorrelating experience for 128 frames...
423
+ [2024-08-26 14:51:23,359][98522] Signal inference workers to stop experience collection...
424
+ [2024-08-26 14:51:23,365][99353] Decorrelating experience for 288 frames...
425
+ [2024-08-26 14:51:23,373][98576] InferenceWorker_p0-w0: stopping experience collection
426
+ [2024-08-26 14:51:23,387][98586] Decorrelating experience for 320 frames...
427
+ [2024-08-26 14:51:23,428][99160] Decorrelating experience for 320 frames...
428
+ [2024-08-26 14:51:23,467][99254] Decorrelating experience for 320 frames...
429
+ [2024-08-26 14:51:23,469][99322] Decorrelating experience for 320 frames...
430
+ [2024-08-26 14:51:23,482][98589] Decorrelating experience for 320 frames...
431
+ [2024-08-26 14:51:23,492][98582] Decorrelating experience for 288 frames...
432
+ [2024-08-26 14:51:23,516][98583] Decorrelating experience for 352 frames...
433
+ [2024-08-26 14:51:23,524][98605] Decorrelating experience for 256 frames...
434
+ [2024-08-26 14:51:23,546][98597] Decorrelating experience for 288 frames...
435
+ [2024-08-26 14:51:23,565][99263] Decorrelating experience for 352 frames...
436
+ [2024-08-26 14:51:23,585][99354] Decorrelating experience for 256 frames...
437
+ [2024-08-26 14:51:23,623][99265] Decorrelating experience for 320 frames...
438
+ [2024-08-26 14:51:23,651][98584] Decorrelating experience for 160 frames...
439
+ [2024-08-26 14:51:23,666][99353] Decorrelating experience for 320 frames...
440
+ [2024-08-26 14:51:23,677][99289] Decorrelating experience for 352 frames...
441
+ [2024-08-26 14:51:23,712][98586] Decorrelating experience for 352 frames...
442
+ [2024-08-26 14:51:23,729][98585] Decorrelating experience for 320 frames...
443
+ [2024-08-26 14:51:23,751][99160] Decorrelating experience for 352 frames...
444
+ [2024-08-26 14:51:23,787][99254] Decorrelating experience for 352 frames...
445
+ [2024-08-26 14:51:23,802][98582] Decorrelating experience for 320 frames...
446
+ [2024-08-26 14:51:23,844][99322] Decorrelating experience for 352 frames...
447
+ [2024-08-26 14:51:23,856][98597] Decorrelating experience for 320 frames...
448
+ [2024-08-26 14:51:23,874][99354] Decorrelating experience for 288 frames...
449
+ [2024-08-26 14:51:23,922][99265] Decorrelating experience for 352 frames...
450
+ [2024-08-26 14:51:23,929][98584] Decorrelating experience for 192 frames...
451
+ [2024-08-26 14:51:23,993][98589] Decorrelating experience for 352 frames...
452
+ [2024-08-26 14:51:24,025][99353] Decorrelating experience for 352 frames...
453
+ [2024-08-26 14:51:24,034][98585] Decorrelating experience for 352 frames...
454
+ [2024-08-26 14:51:24,064][98522] Signal inference workers to resume experience collection...
455
+ [2024-08-26 14:51:24,064][98576] InferenceWorker_p0-w0: resuming experience collection
456
+ [2024-08-26 14:51:24,090][98582] Decorrelating experience for 352 frames...
457
+ [2024-08-26 14:51:24,135][98597] Decorrelating experience for 352 frames...
458
+ [2024-08-26 14:51:24,142][99354] Decorrelating experience for 320 frames...
459
+ [2024-08-26 14:51:24,147][98584] Decorrelating experience for 224 frames...
460
+ [2024-08-26 14:51:24,152][98605] Decorrelating experience for 288 frames...
461
+ [2024-08-26 14:51:24,362][98584] Decorrelating experience for 256 frames...
462
+ [2024-08-26 14:51:24,387][99354] Decorrelating experience for 352 frames...
463
+ [2024-08-26 14:51:24,388][98605] Decorrelating experience for 320 frames...
464
+ [2024-08-26 14:51:24,604][98584] Decorrelating experience for 288 frames...
465
+ [2024-08-26 14:51:24,668][98605] Decorrelating experience for 352 frames...
466
+ [2024-08-26 14:51:24,880][98584] Decorrelating experience for 320 frames...
467
+ [2024-08-26 14:51:25,148][98584] Decorrelating experience for 352 frames...
468
+ [2024-08-26 14:51:25,286][98576] Updated weights for policy 0, policy_version 12 (0.0011)
469
+ [2024-08-26 14:51:26,204][98576] Updated weights for policy 0, policy_version 25 (0.0009)
470
+ [2024-08-26 14:51:26,688][98576] Updated weights for policy 0, policy_version 35 (0.0011)
471
+ [2024-08-26 14:51:27,177][98576] Updated weights for policy 0, policy_version 45 (0.0011)
472
+ [2024-08-26 14:51:27,611][98576] Updated weights for policy 0, policy_version 55 (0.0009)
473
+ [2024-08-26 14:51:28,079][98576] Updated weights for policy 0, policy_version 65 (0.0010)
474
+ [2024-08-26 14:51:28,084][98398] Fps is (10 sec: 27074.1, 60 sec: 27074.1, 300 sec: 27074.1). Total num frames: 266240. Throughput: 0: 4385.1. Samples: 43122. Policy #0 lag: (min: 0.0, avg: 3.0, max: 9.0)
475
+ [2024-08-26 14:51:28,084][98398] Avg episode reward: [(0, '4.311')]
476
+ [2024-08-26 14:51:28,094][98522] Saving new best policy, reward=4.311!
477
+ [2024-08-26 14:51:28,413][98522] Signal inference workers to stop experience collection... (50 times)
478
+ [2024-08-26 14:51:28,423][98522] Signal inference workers to resume experience collection... (50 times)
479
+ [2024-08-26 14:51:28,423][98576] InferenceWorker_p0-w0: stopping experience collection (50 times)
480
+ [2024-08-26 14:51:28,430][98576] InferenceWorker_p0-w0: resuming experience collection (50 times)
481
+ [2024-08-26 14:51:28,505][98576] Updated weights for policy 0, policy_version 75 (0.0013)
482
+ [2024-08-26 14:51:29,048][98576] Updated weights for policy 0, policy_version 85 (0.0010)
483
+ [2024-08-26 14:51:29,632][98576] Updated weights for policy 0, policy_version 96 (0.0009)
484
+ [2024-08-26 14:51:30,146][98576] Updated weights for policy 0, policy_version 107 (0.0011)
485
+ [2024-08-26 14:51:30,625][98576] Updated weights for policy 0, policy_version 117 (0.0009)
486
+ [2024-08-26 14:51:31,066][98576] Updated weights for policy 0, policy_version 127 (0.0011)
487
+ [2024-08-26 14:51:31,502][98576] Updated weights for policy 0, policy_version 137 (0.0008)
488
+ [2024-08-26 14:51:31,748][98522] Signal inference workers to stop experience collection... (100 times)
489
+ [2024-08-26 14:51:31,748][98522] Signal inference workers to resume experience collection... (100 times)
490
+ [2024-08-26 14:51:31,763][98576] InferenceWorker_p0-w0: stopping experience collection (100 times)
491
+ [2024-08-26 14:51:31,763][98576] InferenceWorker_p0-w0: resuming experience collection (100 times)
492
+ [2024-08-26 14:51:31,952][98576] Updated weights for policy 0, policy_version 147 (0.0010)
493
+ [2024-08-26 14:51:32,469][98576] Updated weights for policy 0, policy_version 157 (0.0010)
494
+ [2024-08-26 14:51:33,006][98576] Updated weights for policy 0, policy_version 167 (0.0011)
495
+ [2024-08-26 14:51:33,084][98398] Fps is (10 sec: 69223.1, 60 sec: 46665.3, 300 sec: 46665.3). Total num frames: 692224. Throughput: 0: 11620.7. Samples: 172380. Policy #0 lag: (min: 0.0, avg: 3.9, max: 10.0)
496
+ [2024-08-26 14:51:33,084][98398] Avg episode reward: [(0, '4.302')]
497
+ [2024-08-26 14:51:33,460][98576] Updated weights for policy 0, policy_version 177 (0.0016)
498
+ [2024-08-26 14:51:33,940][98576] Updated weights for policy 0, policy_version 187 (0.0011)
499
+ [2024-08-26 14:51:34,373][98576] Updated weights for policy 0, policy_version 197 (0.0010)
500
+ [2024-08-26 14:51:34,809][98398] Heartbeat connected on Batcher_0
501
+ [2024-08-26 14:51:34,821][98398] Heartbeat connected on RolloutWorker_w4
502
+ [2024-08-26 14:51:34,822][98398] Heartbeat connected on RolloutWorker_w0
503
+ [2024-08-26 14:51:34,822][98398] Heartbeat connected on RolloutWorker_w2
504
+ [2024-08-26 14:51:34,822][98398] Heartbeat connected on RolloutWorker_w5
505
+ [2024-08-26 14:51:34,822][98398] Heartbeat connected on RolloutWorker_w1
506
+ [2024-08-26 14:51:34,822][98398] Heartbeat connected on RolloutWorker_w3
507
+ [2024-08-26 14:51:34,823][98398] Heartbeat connected on RolloutWorker_w6
508
+ [2024-08-26 14:51:34,824][98398] Heartbeat connected on RolloutWorker_w7
509
+ [2024-08-26 14:51:34,825][98398] Heartbeat connected on InferenceWorker_p0-w0
510
+ [2024-08-26 14:51:34,830][98398] Heartbeat connected on RolloutWorker_w9
511
+ [2024-08-26 14:51:34,830][98398] Heartbeat connected on RolloutWorker_w8
512
+ [2024-08-26 14:51:34,831][98398] Heartbeat connected on RolloutWorker_w10
513
+ [2024-08-26 14:51:34,831][98398] Heartbeat connected on RolloutWorker_w11
514
+ [2024-08-26 14:51:34,831][98398] Heartbeat connected on RolloutWorker_w12
515
+ [2024-08-26 14:51:34,832][98522] Signal inference workers to stop experience collection... (150 times)
516
+ [2024-08-26 14:51:34,834][98398] Heartbeat connected on RolloutWorker_w13
517
+ [2024-08-26 14:51:34,839][98398] Heartbeat connected on RolloutWorker_w15
518
+ [2024-08-26 14:51:34,839][98398] Heartbeat connected on RolloutWorker_w17
519
+ [2024-08-26 14:51:34,839][98398] Heartbeat connected on RolloutWorker_w16
520
+ [2024-08-26 14:51:34,839][98398] Heartbeat connected on RolloutWorker_w14
521
+ [2024-08-26 14:51:34,839][98398] Heartbeat connected on RolloutWorker_w18
522
+ [2024-08-26 14:51:34,840][98398] Heartbeat connected on RolloutWorker_w19
523
+ [2024-08-26 14:51:34,841][98398] Heartbeat connected on RolloutWorker_w20
524
+ [2024-08-26 14:51:34,842][98398] Heartbeat connected on RolloutWorker_w21
525
+ [2024-08-26 14:51:34,844][98398] Heartbeat connected on RolloutWorker_w22
526
+ [2024-08-26 14:51:34,845][98398] Heartbeat connected on RolloutWorker_w23
527
+ [2024-08-26 14:51:34,845][98398] Heartbeat connected on LearnerWorker_p0
528
+ [2024-08-26 14:51:34,845][98522] Signal inference workers to resume experience collection... (150 times)
529
+ [2024-08-26 14:51:34,847][98576] InferenceWorker_p0-w0: stopping experience collection (150 times)
530
+ [2024-08-26 14:51:34,849][98576] Updated weights for policy 0, policy_version 207 (0.0010)
531
+ [2024-08-26 14:51:34,856][98576] InferenceWorker_p0-w0: resuming experience collection (150 times)
532
+ [2024-08-26 14:51:35,268][98576] Updated weights for policy 0, policy_version 217 (0.0008)
533
+ [2024-08-26 14:51:35,753][98576] Updated weights for policy 0, policy_version 228 (0.0010)
534
+ [2024-08-26 14:51:36,307][98576] Updated weights for policy 0, policy_version 238 (0.0011)
535
+ [2024-08-26 14:51:36,826][98576] Updated weights for policy 0, policy_version 248 (0.0010)
536
+ [2024-08-26 14:51:37,293][98576] Updated weights for policy 0, policy_version 258 (0.0008)
537
+ [2024-08-26 14:51:37,744][98576] Updated weights for policy 0, policy_version 268 (0.0011)
538
+ [2024-08-26 14:51:38,084][98398] Fps is (10 sec: 85604.5, 60 sec: 56584.9, 300 sec: 56584.9). Total num frames: 1122304. Throughput: 0: 11944.7. Samples: 236910. Policy #0 lag: (min: 1.0, avg: 4.7, max: 11.0)
539
+ [2024-08-26 14:51:38,085][98398] Avg episode reward: [(0, '4.527')]
540
+ [2024-08-26 14:51:38,097][98522] Saving new best policy, reward=4.527!
541
+ [2024-08-26 14:51:38,220][98576] Updated weights for policy 0, policy_version 278 (0.0015)
542
+ [2024-08-26 14:51:38,286][98522] Signal inference workers to stop experience collection... (200 times)
543
+ [2024-08-26 14:51:38,292][98576] InferenceWorker_p0-w0: stopping experience collection (200 times)
544
+ [2024-08-26 14:51:38,302][98522] Signal inference workers to resume experience collection... (200 times)
545
+ [2024-08-26 14:51:38,302][98576] InferenceWorker_p0-w0: resuming experience collection (200 times)
546
+ [2024-08-26 14:51:38,648][98576] Updated weights for policy 0, policy_version 288 (0.0009)
547
+ [2024-08-26 14:51:39,110][98576] Updated weights for policy 0, policy_version 298 (0.0011)
548
+ [2024-08-26 14:51:39,547][98576] Updated weights for policy 0, policy_version 308 (0.0009)
549
+ [2024-08-26 14:51:40,000][98576] Updated weights for policy 0, policy_version 318 (0.0010)
550
+ [2024-08-26 14:51:40,459][98576] Updated weights for policy 0, policy_version 328 (0.0011)
551
+ [2024-08-26 14:51:41,041][98576] Updated weights for policy 0, policy_version 338 (0.0010)
552
+ [2024-08-26 14:51:41,529][98576] Updated weights for policy 0, policy_version 349 (0.0009)
553
+ [2024-08-26 14:51:41,600][98522] Signal inference workers to stop experience collection... (250 times)
554
+ [2024-08-26 14:51:41,608][98576] InferenceWorker_p0-w0: stopping experience collection (250 times)
555
+ [2024-08-26 14:51:41,612][98522] Signal inference workers to resume experience collection... (250 times)
556
+ [2024-08-26 14:51:41,615][98576] InferenceWorker_p0-w0: resuming experience collection (250 times)
557
+ [2024-08-26 14:51:41,990][98576] Updated weights for policy 0, policy_version 359 (0.0011)
558
+ [2024-08-26 14:51:42,402][98576] Updated weights for policy 0, policy_version 369 (0.0011)
559
+ [2024-08-26 14:51:42,888][98576] Updated weights for policy 0, policy_version 379 (0.0011)
560
+ [2024-08-26 14:51:43,084][98398] Fps is (10 sec: 87243.5, 60 sec: 63005.3, 300 sec: 63005.3). Total num frames: 1564672. Throughput: 0: 14836.9. Samples: 368460. Policy #0 lag: (min: 0.0, avg: 5.8, max: 10.0)
561
+ [2024-08-26 14:51:43,085][98398] Avg episode reward: [(0, '4.298')]
562
+ [2024-08-26 14:51:43,340][98576] Updated weights for policy 0, policy_version 389 (0.0010)
563
+ [2024-08-26 14:51:43,771][98576] Updated weights for policy 0, policy_version 399 (0.0014)
564
+ [2024-08-26 14:51:44,310][98576] Updated weights for policy 0, policy_version 409 (0.0010)
565
+ [2024-08-26 14:51:44,876][98576] Updated weights for policy 0, policy_version 419 (0.0009)
566
+ [2024-08-26 14:51:45,298][98576] Updated weights for policy 0, policy_version 429 (0.0008)
567
+ [2024-08-26 14:51:45,357][98522] Signal inference workers to stop experience collection... (300 times)
568
+ [2024-08-26 14:51:45,364][98522] Signal inference workers to resume experience collection... (300 times)
569
+ [2024-08-26 14:51:45,371][98576] InferenceWorker_p0-w0: stopping experience collection (300 times)
570
+ [2024-08-26 14:51:45,371][98576] InferenceWorker_p0-w0: resuming experience collection (300 times)
571
+ [2024-08-26 14:51:45,756][98576] Updated weights for policy 0, policy_version 439 (0.0010)
572
+ [2024-08-26 14:51:46,247][98576] Updated weights for policy 0, policy_version 449 (0.0009)
573
+ [2024-08-26 14:51:46,696][98576] Updated weights for policy 0, policy_version 459 (0.0013)
574
+ [2024-08-26 14:51:47,167][98576] Updated weights for policy 0, policy_version 469 (0.0011)
575
+ [2024-08-26 14:51:47,613][98576] Updated weights for policy 0, policy_version 479 (0.0013)
576
+ [2024-08-26 14:51:48,084][98398] Fps is (10 sec: 87654.9, 60 sec: 66999.2, 300 sec: 66999.2). Total num frames: 1998848. Throughput: 0: 16698.4. Samples: 498180. Policy #0 lag: (min: 2.0, avg: 6.4, max: 12.0)
577
+ [2024-08-26 14:51:48,085][98398] Avg episode reward: [(0, '4.265')]
578
+ [2024-08-26 14:51:48,094][98576] Updated weights for policy 0, policy_version 489 (0.0010)
579
+ [2024-08-26 14:51:48,541][98576] Updated weights for policy 0, policy_version 499 (0.0011)
580
+ [2024-08-26 14:51:48,881][98522] Signal inference workers to stop experience collection... (350 times)
581
+ [2024-08-26 14:51:48,888][98576] InferenceWorker_p0-w0: stopping experience collection (350 times)
582
+ [2024-08-26 14:51:48,891][98522] Signal inference workers to resume experience collection... (350 times)
583
+ [2024-08-26 14:51:48,894][98576] InferenceWorker_p0-w0: resuming experience collection (350 times)
584
+ [2024-08-26 14:51:49,083][98576] Updated weights for policy 0, policy_version 509 (0.0012)
585
+ [2024-08-26 14:51:49,551][98576] Updated weights for policy 0, policy_version 519 (0.0010)
586
+ [2024-08-26 14:51:50,018][98576] Updated weights for policy 0, policy_version 529 (0.0010)
587
+ [2024-08-26 14:51:50,479][98576] Updated weights for policy 0, policy_version 539 (0.0010)
588
+ [2024-08-26 14:51:50,934][98576] Updated weights for policy 0, policy_version 549 (0.0012)
589
+ [2024-08-26 14:51:51,368][98576] Updated weights for policy 0, policy_version 559 (0.0009)
590
+ [2024-08-26 14:51:51,809][98576] Updated weights for policy 0, policy_version 569 (0.0010)
591
+ [2024-08-26 14:51:52,260][98576] Updated weights for policy 0, policy_version 579 (0.0009)
592
+ [2024-08-26 14:51:52,707][98576] Updated weights for policy 0, policy_version 589 (0.0012)
593
+ [2024-08-26 14:51:53,084][98398] Fps is (10 sec: 87245.6, 60 sec: 69964.1, 300 sec: 69964.1). Total num frames: 2437120. Throughput: 0: 16196.8. Samples: 564198. Policy #0 lag: (min: 1.0, avg: 5.2, max: 11.0)
594
+ [2024-08-26 14:51:53,085][98398] Avg episode reward: [(0, '4.609')]
595
+ [2024-08-26 14:51:53,107][98522] Signal inference workers to stop experience collection... (400 times)
596
+ [2024-08-26 14:51:53,115][98576] InferenceWorker_p0-w0: stopping experience collection (400 times)
597
+ [2024-08-26 14:51:53,128][98522] Signal inference workers to resume experience collection... (400 times)
598
+ [2024-08-26 14:51:53,128][98522] Saving new best policy, reward=4.609!
599
+ [2024-08-26 14:51:53,128][98576] InferenceWorker_p0-w0: resuming experience collection (400 times)
600
+ [2024-08-26 14:51:53,314][98576] Updated weights for policy 0, policy_version 599 (0.0012)
601
+ [2024-08-26 14:51:53,757][98576] Updated weights for policy 0, policy_version 609 (0.0010)
602
+ [2024-08-26 14:51:54,279][98576] Updated weights for policy 0, policy_version 619 (0.0010)
603
+ [2024-08-26 14:51:54,731][98576] Updated weights for policy 0, policy_version 629 (0.0011)
604
+ [2024-08-26 14:51:55,178][98576] Updated weights for policy 0, policy_version 639 (0.0009)
605
+ [2024-08-26 14:51:55,633][98576] Updated weights for policy 0, policy_version 649 (0.0009)
606
+ [2024-08-26 14:51:56,087][98576] Updated weights for policy 0, policy_version 659 (0.0011)
607
+ [2024-08-26 14:51:56,290][98522] Signal inference workers to stop experience collection... (450 times)
608
+ [2024-08-26 14:51:56,290][98522] Signal inference workers to resume experience collection... (450 times)
609
+ [2024-08-26 14:51:56,300][98576] InferenceWorker_p0-w0: stopping experience collection (450 times)
610
+ [2024-08-26 14:51:56,300][98576] InferenceWorker_p0-w0: resuming experience collection (450 times)
611
+ [2024-08-26 14:51:56,541][98576] Updated weights for policy 0, policy_version 669 (0.0012)
612
+ [2024-08-26 14:51:57,000][98576] Updated weights for policy 0, policy_version 679 (0.0010)
613
+ [2024-08-26 14:51:57,537][98576] Updated weights for policy 0, policy_version 689 (0.0008)
614
+ [2024-08-26 14:51:58,014][98576] Updated weights for policy 0, policy_version 699 (0.0016)
615
+ [2024-08-26 14:51:58,085][98398] Fps is (10 sec: 86423.5, 60 sec: 71875.6, 300 sec: 71875.6). Total num frames: 2863104. Throughput: 0: 17430.4. Samples: 694326. Policy #0 lag: (min: 0.0, avg: 5.3, max: 11.0)
616
+ [2024-08-26 14:51:58,085][98398] Avg episode reward: [(0, '4.465')]
617
+ [2024-08-26 14:51:58,481][98576] Updated weights for policy 0, policy_version 709 (0.0010)
618
+ [2024-08-26 14:51:58,990][98576] Updated weights for policy 0, policy_version 719 (0.0011)
619
+ [2024-08-26 14:51:59,426][98576] Updated weights for policy 0, policy_version 729 (0.0009)
620
+ [2024-08-26 14:51:59,475][98522] Signal inference workers to stop experience collection... (500 times)
621
+ [2024-08-26 14:51:59,475][98522] Signal inference workers to resume experience collection... (500 times)
622
+ [2024-08-26 14:51:59,486][98576] InferenceWorker_p0-w0: stopping experience collection (500 times)
623
+ [2024-08-26 14:51:59,486][98576] InferenceWorker_p0-w0: resuming experience collection (500 times)
624
+ [2024-08-26 14:51:59,891][98576] Updated weights for policy 0, policy_version 739 (0.0010)
625
+ [2024-08-26 14:52:00,333][98576] Updated weights for policy 0, policy_version 749 (0.0010)
626
+ [2024-08-26 14:52:00,749][98576] Updated weights for policy 0, policy_version 759 (0.0011)
627
+ [2024-08-26 14:52:01,224][98576] Updated weights for policy 0, policy_version 769 (0.0010)
628
+ [2024-08-26 14:52:01,745][98576] Updated weights for policy 0, policy_version 779 (0.0010)
629
+ [2024-08-26 14:52:02,203][98576] Updated weights for policy 0, policy_version 789 (0.0011)
630
+ [2024-08-26 14:52:02,650][98576] Updated weights for policy 0, policy_version 799 (0.0008)
631
+ [2024-08-26 14:52:03,084][98398] Fps is (10 sec: 86835.0, 60 sec: 73727.1, 300 sec: 73727.1). Total num frames: 3305472. Throughput: 0: 18420.9. Samples: 825882. Policy #0 lag: (min: 0.0, avg: 4.8, max: 10.0)
632
+ [2024-08-26 14:52:03,085][98398] Avg episode reward: [(0, '4.489')]
633
+ [2024-08-26 14:52:03,159][98576] Updated weights for policy 0, policy_version 809 (0.0011)
634
+ [2024-08-26 14:52:03,588][98576] Updated weights for policy 0, policy_version 819 (0.0008)
635
+ [2024-08-26 14:52:04,051][98576] Updated weights for policy 0, policy_version 829 (0.0009)
636
+ [2024-08-26 14:52:04,484][98522] Signal inference workers to stop experience collection... (550 times)
637
+ [2024-08-26 14:52:04,494][98576] InferenceWorker_p0-w0: stopping experience collection (550 times)
638
+ [2024-08-26 14:52:04,495][98522] Signal inference workers to resume experience collection... (550 times)
639
+ [2024-08-26 14:52:04,496][98576] Updated weights for policy 0, policy_version 839 (0.0009)
640
+ [2024-08-26 14:52:04,501][98576] InferenceWorker_p0-w0: resuming experience collection (550 times)
641
+ [2024-08-26 14:52:04,955][98576] Updated weights for policy 0, policy_version 849 (0.0011)
642
+ [2024-08-26 14:52:05,446][98576] Updated weights for policy 0, policy_version 859 (0.0008)
643
+ [2024-08-26 14:52:05,957][98576] Updated weights for policy 0, policy_version 869 (0.0010)
644
+ [2024-08-26 14:52:06,419][98576] Updated weights for policy 0, policy_version 879 (0.0010)
645
+ [2024-08-26 14:52:06,904][98576] Updated weights for policy 0, policy_version 889 (0.0011)
646
+ [2024-08-26 14:52:07,371][98576] Updated weights for policy 0, policy_version 899 (0.0009)
647
+ [2024-08-26 14:52:07,808][98576] Updated weights for policy 0, policy_version 909 (0.0009)
648
+ [2024-08-26 14:52:08,084][98398] Fps is (10 sec: 88067.5, 60 sec: 75124.6, 300 sec: 75124.6). Total num frames: 3743744. Throughput: 0: 19803.8. Samples: 891168. Policy #0 lag: (min: 0.0, avg: 4.9, max: 10.0)
649
+ [2024-08-26 14:52:08,084][98398] Avg episode reward: [(0, '4.461')]
650
+ [2024-08-26 14:52:08,290][98576] Updated weights for policy 0, policy_version 919 (0.0010)
651
+ [2024-08-26 14:52:08,721][98576] Updated weights for policy 0, policy_version 929 (0.0008)
652
+ [2024-08-26 14:52:09,177][98576] Updated weights for policy 0, policy_version 939 (0.0012)
653
+ [2024-08-26 14:52:09,661][98576] Updated weights for policy 0, policy_version 949 (0.0009)
654
+ [2024-08-26 14:52:10,127][98576] Updated weights for policy 0, policy_version 959 (0.0011)
655
+ [2024-08-26 14:52:10,300][98522] Signal inference workers to stop experience collection... (600 times)
656
+ [2024-08-26 14:52:10,300][98522] Signal inference workers to resume experience collection... (600 times)
657
+ [2024-08-26 14:52:10,310][98576] InferenceWorker_p0-w0: stopping experience collection (600 times)
658
+ [2024-08-26 14:52:10,311][98576] InferenceWorker_p0-w0: resuming experience collection (600 times)
659
+ [2024-08-26 14:52:10,620][98576] Updated weights for policy 0, policy_version 969 (0.0008)
660
+ [2024-08-26 14:52:11,039][98398] Component Batcher_0 stopped!
661
+ [2024-08-26 14:52:11,039][98522] Stopping Batcher_0...
662
+ [2024-08-26 14:52:11,040][98522] Loop batcher_evt_loop terminating...
663
+ [2024-08-26 14:52:11,040][98522] Saving /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
664
+ [2024-08-26 14:52:11,054][98576] Weights refcount: 2 0
665
+ [2024-08-26 14:52:11,056][98576] Stopping InferenceWorker_p0-w0...
666
+ [2024-08-26 14:52:11,056][98398] Component InferenceWorker_p0-w0 stopped!
667
+ [2024-08-26 14:52:11,056][98576] Loop inference_proc0-0_evt_loop terminating...
668
+ [2024-08-26 14:52:11,086][98522] Saving /home/ai24/condaprojects/droid/d0/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
669
+ [2024-08-26 14:52:11,124][99289] Stopping RolloutWorker_w20...
670
+ [2024-08-26 14:52:11,125][98398] Component RolloutWorker_w20 stopped!
671
+ [2024-08-26 14:52:11,125][99289] Loop rollout_proc20_evt_loop terminating...
672
+ [2024-08-26 14:52:11,127][98579] Stopping RolloutWorker_w2...
673
+ [2024-08-26 14:52:11,127][98398] Component RolloutWorker_w2 stopped!
674
+ [2024-08-26 14:52:11,127][98579] Loop rollout_proc2_evt_loop terminating...
675
+ [2024-08-26 14:52:11,128][98398] Component RolloutWorker_w17 stopped!
676
+ [2024-08-26 14:52:11,128][99254] Stopping RolloutWorker_w17...
677
+ [2024-08-26 14:52:11,128][99254] Loop rollout_proc17_evt_loop terminating...
678
+ [2024-08-26 14:52:11,129][98578] Stopping RolloutWorker_w1...
679
+ [2024-08-26 14:52:11,129][98398] Component RolloutWorker_w1 stopped!
680
+ [2024-08-26 14:52:11,129][98586] Stopping RolloutWorker_w9...
681
+ [2024-08-26 14:52:11,129][98398] Component RolloutWorker_w9 stopped!
682
+ [2024-08-26 14:52:11,129][98578] Loop rollout_proc1_evt_loop terminating...
683
+ [2024-08-26 14:52:11,129][98582] Stopping RolloutWorker_w4...
684
+ [2024-08-26 14:52:11,129][98398] Component RolloutWorker_w4 stopped!
685
+ [2024-08-26 14:52:11,129][98586] Loop rollout_proc9_evt_loop terminating...
686
+ [2024-08-26 14:52:11,129][98584] Stopping RolloutWorker_w6...
687
+ [2024-08-26 14:52:11,129][98582] Loop rollout_proc4_evt_loop terminating...
688
+ [2024-08-26 14:52:11,129][98398] Component RolloutWorker_w6 stopped!
689
+ [2024-08-26 14:52:11,129][98584] Loop rollout_proc6_evt_loop terminating...
690
+ [2024-08-26 14:52:11,130][98398] Component RolloutWorker_w10 stopped!
691
+ [2024-08-26 14:52:11,130][98587] Stopping RolloutWorker_w10...
692
+ [2024-08-26 14:52:11,130][98587] Loop rollout_proc10_evt_loop terminating...
693
+ [2024-08-26 14:52:11,141][99322] Stopping RolloutWorker_w21...
694
+ [2024-08-26 14:52:11,141][98585] Stopping RolloutWorker_w8...
695
+ [2024-08-26 14:52:11,141][99322] Loop rollout_proc21_evt_loop terminating...
696
+ [2024-08-26 14:52:11,141][98398] Component RolloutWorker_w21 stopped!
697
+ [2024-08-26 14:52:11,141][99128] Stopping RolloutWorker_w15...
698
+ [2024-08-26 14:52:11,141][98585] Loop rollout_proc8_evt_loop terminating...
699
+ [2024-08-26 14:52:11,141][98522] Stopping LearnerWorker_p0...
700
+ [2024-08-26 14:52:11,141][98581] Stopping RolloutWorker_w5...
701
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w8 stopped!
702
+ [2024-08-26 14:52:11,141][98580] Stopping RolloutWorker_w3...
703
+ [2024-08-26 14:52:11,142][99128] Loop rollout_proc15_evt_loop terminating...
704
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w15 stopped!
705
+ [2024-08-26 14:52:11,142][98522] Loop learner_proc0_evt_loop terminating...
706
+ [2024-08-26 14:52:11,142][98589] Stopping RolloutWorker_w12...
707
+ [2024-08-26 14:52:11,142][98581] Loop rollout_proc5_evt_loop terminating...
708
+ [2024-08-26 14:52:11,142][98577] Stopping RolloutWorker_w0...
709
+ [2024-08-26 14:52:11,142][98398] Component LearnerWorker_p0 stopped!
710
+ [2024-08-26 14:52:11,142][99160] Stopping RolloutWorker_w16...
711
+ [2024-08-26 14:52:11,142][98605] Stopping RolloutWorker_w14...
712
+ [2024-08-26 14:52:11,142][98588] Stopping RolloutWorker_w11...
713
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w5 stopped!
714
+ [2024-08-26 14:52:11,142][98580] Loop rollout_proc3_evt_loop terminating...
715
+ [2024-08-26 14:52:11,142][98589] Loop rollout_proc12_evt_loop terminating...
716
+ [2024-08-26 14:52:11,142][98577] Loop rollout_proc0_evt_loop terminating...
717
+ [2024-08-26 14:52:11,142][99160] Loop rollout_proc16_evt_loop terminating...
718
+ [2024-08-26 14:52:11,142][98605] Loop rollout_proc14_evt_loop terminating...
719
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w3 stopped!
720
+ [2024-08-26 14:52:11,142][98588] Loop rollout_proc11_evt_loop terminating...
721
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w12 stopped!
722
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w0 stopped!
723
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w16 stopped!
724
+ [2024-08-26 14:52:11,142][99354] Stopping RolloutWorker_w23...
725
+ [2024-08-26 14:52:11,142][98398] Component RolloutWorker_w14 stopped!
726
+ [2024-08-26 14:52:11,143][99354] Loop rollout_proc23_evt_loop terminating...
727
+ [2024-08-26 14:52:11,143][98398] Component RolloutWorker_w11 stopped!
728
+ [2024-08-26 14:52:11,143][99263] Stopping RolloutWorker_w19...
729
+ [2024-08-26 14:52:11,143][98398] Component RolloutWorker_w23 stopped!
730
+ [2024-08-26 14:52:11,143][98398] Component RolloutWorker_w19 stopped!
731
+ [2024-08-26 14:52:11,143][98597] Stopping RolloutWorker_w13...
732
+ [2024-08-26 14:52:11,143][99263] Loop rollout_proc19_evt_loop terminating...
733
+ [2024-08-26 14:52:11,143][98398] Component RolloutWorker_w13 stopped!
734
+ [2024-08-26 14:52:11,143][98597] Loop rollout_proc13_evt_loop terminating...
735
+ [2024-08-26 14:52:11,143][98398] Component RolloutWorker_w22 stopped!
736
+ [2024-08-26 14:52:11,143][99353] Stopping RolloutWorker_w22...
737
+ [2024-08-26 14:52:11,144][99353] Loop rollout_proc22_evt_loop terminating...
738
+ [2024-08-26 14:52:11,144][98398] Component RolloutWorker_w7 stopped!
739
+ [2024-08-26 14:52:11,144][98583] Stopping RolloutWorker_w7...
740
+ [2024-08-26 14:52:11,144][98583] Loop rollout_proc7_evt_loop terminating...
741
+ [2024-08-26 14:52:11,147][98398] Component RolloutWorker_w18 stopped!
742
+ [2024-08-26 14:52:11,147][99265] Stopping RolloutWorker_w18...
743
+ [2024-08-26 14:52:11,147][98398] Waiting for process learner_proc0 to stop...
744
+ [2024-08-26 14:52:11,147][99265] Loop rollout_proc18_evt_loop terminating...
745
+ [2024-08-26 14:52:11,958][98398] Waiting for process inference_proc0-0 to join...
746
+ [2024-08-26 14:52:11,958][98398] Waiting for process rollout_proc0 to join...
747
+ [2024-08-26 14:52:11,958][98398] Waiting for process rollout_proc1 to join...
748
+ [2024-08-26 14:52:11,958][98398] Waiting for process rollout_proc2 to join...
749
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc3 to join...
750
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc4 to join...
751
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc5 to join...
752
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc6 to join...
753
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc7 to join...
754
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc8 to join...
755
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc9 to join...
756
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc10 to join...
757
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc11 to join...
758
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc12 to join...
759
+ [2024-08-26 14:52:11,959][98398] Waiting for process rollout_proc13 to join...
760
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc14 to join...
761
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc15 to join...
762
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc16 to join...
763
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc17 to join...
764
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc18 to join...
765
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc19 to join...
766
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc20 to join...
767
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc21 to join...
768
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc22 to join...
769
+ [2024-08-26 14:52:11,960][98398] Waiting for process rollout_proc23 to join...
770
+ [2024-08-26 14:52:11,961][98398] Batcher 0 profile tree view:
771
+ batching: 8.8489, releasing_batches: 5.4352
772
+ [2024-08-26 14:52:11,961][98398] InferenceWorker_p0-w0 profile tree view:
773
+ wait_policy: 0.0001
774
+ wait_policy_total: 5.1096
775
+ update_model: 1.3600
776
+ weight_update: 0.0010
777
+ one_step: 0.0039
778
+ handle_policy_step: 44.6662
779
+ deserialize: 5.8751, stack: 0.2202, obs_to_device_normalize: 11.8517, forward: 17.3113, send_messages: 3.6109
780
+ prepare_outputs: 4.5991
781
+ to_cpu: 2.7106
782
+ [2024-08-26 14:52:11,961][98398] Learner 0 profile tree view:
783
+ misc: 0.0035, prepare_batch: 7.8784
784
+ train: 18.0238
785
+ epoch_init: 0.0032, minibatch_init: 0.0038, losses_postprocess: 0.3948, kl_divergence: 0.4595, after_optimizer: 1.2354
786
+ calculate_losses: 8.3610
787
+ losses_init: 0.0016, forward_head: 0.9243, bptt_initial: 3.4646, tail: 0.6984, advantages_returns: 0.2220, losses: 1.4552
788
+ bptt: 1.4319
789
+ bptt_forward_core: 1.3695
790
+ update: 7.2580
791
+ clip: 0.7661
792
+ [2024-08-26 14:52:11,961][98398] RolloutWorker_w0 profile tree view:
793
+ wait_for_trajectories: 0.0191, enqueue_policy_requests: 1.4591, env_step: 29.5664, overhead: 1.6877, complete_rollouts: 0.0252
794
+ save_policy_outputs: 1.6654
795
+ split_output_tensors: 0.5414
796
+ [2024-08-26 14:52:11,961][98398] RolloutWorker_w23 profile tree view:
797
+ wait_for_trajectories: 0.0188, enqueue_policy_requests: 1.4002, env_step: 29.0792, overhead: 1.6564, complete_rollouts: 0.0229
798
+ save_policy_outputs: 1.6343
799
+ split_output_tensors: 0.5361
800
+ [2024-08-26 14:52:11,961][98398] Loop Runner_EvtLoop terminating...
801
+ [2024-08-26 14:52:11,961][98398] Runner profile tree view:
802
+ main_loop: 57.1156
803
+ [2024-08-26 14:52:11,961][98398] Collected {0: 4005888}, FPS: 70136.5