Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.summary/0/events.out.tfevents.1682809515.CAPTAIN-AMERICA +3 -0
.summary/0/events.out.tfevents.1682810224.CAPTAIN-AMERICA +3 -0
README.md +1 -1
checkpoint_p0/checkpoint_000000980_4014080.pth +3 -0
checkpoint_p0/checkpoint_000000982_4022272.pth +3 -0
replay.mp4 +2 -2
sf_log.txt +579 -0

.summary/0/events.out.tfevents.1682809515.CAPTAIN-AMERICA ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:359eb8396a006e3b13e3a02a40ea08a7eb499a2a8095355685f5f4a28936eae9
+size 2612

.summary/0/events.out.tfevents.1682810224.CAPTAIN-AMERICA ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b77fb55b78546d12e0bb5f78c39f4ddfa2ec99cf80e5ec49a22aaca16b5f4ef
+size 2158

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.90 +/- 0.60
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 4.10 +/- 0.49
       name: mean_reward
       verified: false
 ---

checkpoint_p0/checkpoint_000000980_4014080.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79287a35aa9bcd16dd35958ecb8ffd006a97ba97324a7a148d07dab54b8b40ea
+size 34929220

checkpoint_p0/checkpoint_000000982_4022272.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a379b3012b3684e426e9c3a17dfef0fc6ac13d78a67db96f496ef202ab5f916
+size 34929220

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e1acb924a30b27425c5dcc73ae4ba44ec05f28cea902cafff951ef665970113
-size 5397393

 version https://git-lfs.github.com/spec/v1
+oid sha256:d032c87a2efab0e06ac7ac1ce54c8ae2332a4fa365b19ca4bd69561d097bdd8b
+size 6150210

sf_log.txt CHANGED Viewed

@@ -622,3 +622,582 @@ main_loop: 217.2086
 [2023-04-27 22:36:26,842][19320] Avg episode rewards: #0: 4.304, true rewards: #0: 3.904
 [2023-04-27 22:36:26,842][19320] Avg episode reward: 4.304, avg true_objective: 3.904
 [2023-04-27 22:36:30,880][19320] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!

 [2023-04-27 22:36:26,842][19320] Avg episode rewards: #0: 4.304, true rewards: #0: 3.904
 [2023-04-27 22:36:26,842][19320] Avg episode reward: 4.304, avg true_objective: 3.904
 [2023-04-27 22:36:30,880][19320] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!
+[2023-04-27 22:36:34,016][19320] The model has been pushed to https://huggingface.co/ItchyB/rl_course_vizdoom_health_gathering_supreme
+[2023-04-29 19:05:17,493][108205] Saving configuration to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json...
+[2023-04-29 19:05:17,495][108205] Rollout worker 0 uses device cpu
+[2023-04-29 19:05:17,495][108205] Rollout worker 1 uses device cpu
+[2023-04-29 19:05:17,496][108205] Rollout worker 2 uses device cpu
+[2023-04-29 19:05:17,496][108205] Rollout worker 3 uses device cpu
+[2023-04-29 19:05:17,497][108205] Rollout worker 4 uses device cpu
+[2023-04-29 19:05:17,498][108205] Rollout worker 5 uses device cpu
+[2023-04-29 19:05:17,498][108205] Rollout worker 6 uses device cpu
+[2023-04-29 19:05:17,499][108205] Rollout worker 7 uses device cpu
+[2023-04-29 19:05:17,524][108205] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:05:17,525][108205] InferenceWorker_p0-w0: min num requests: 2
+[2023-04-29 19:05:17,540][108205] Starting all processes...
+[2023-04-29 19:05:17,540][108205] Starting process learner_proc0
+[2023-04-29 19:05:17,634][108205] Starting all processes...
+[2023-04-29 19:05:17,637][108205] Starting process inference_proc0-0
+[2023-04-29 19:05:17,638][108205] Starting process rollout_proc0
+[2023-04-29 19:05:17,638][108205] Starting process rollout_proc1
+[2023-04-29 19:05:17,638][108205] Starting process rollout_proc2
+[2023-04-29 19:05:17,639][108205] Starting process rollout_proc3
+[2023-04-29 19:05:17,639][108205] Starting process rollout_proc4
+[2023-04-29 19:05:17,639][108205] Starting process rollout_proc5
+[2023-04-29 19:05:17,640][108205] Starting process rollout_proc6
+[2023-04-29 19:05:17,640][108205] Starting process rollout_proc7
+[2023-04-29 19:05:18,549][133597] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:05:18,549][133597] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2023-04-29 19:05:18,577][133617] Worker 6 uses CPU cores [18, 19, 20]
+[2023-04-29 19:05:18,590][133597] Num visible devices: 1
+[2023-04-29 19:05:18,596][133612] Worker 1 uses CPU cores [3, 4, 5]
+[2023-04-29 19:05:18,600][133610] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:05:18,600][133610] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2023-04-29 19:05:18,614][133615] Worker 4 uses CPU cores [12, 13, 14]
+[2023-04-29 19:05:18,615][133610] Num visible devices: 1
+[2023-04-29 19:05:18,623][133611] Worker 0 uses CPU cores [0, 1, 2]
+[2023-04-29 19:05:18,626][133616] Worker 5 uses CPU cores [15, 16, 17]
+[2023-04-29 19:05:18,626][133613] Worker 3 uses CPU cores [9, 10, 11]
+[2023-04-29 19:05:18,637][133597] Starting seed is not provided
+[2023-04-29 19:05:18,637][133597] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:05:18,637][133597] Initializing actor-critic model on device cuda:0
+[2023-04-29 19:05:18,638][133597] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:05:18,638][133597] RunningMeanStd input shape: (1,)
+[2023-04-29 19:05:18,640][133618] Worker 7 uses CPU cores [21, 22, 23]
+[2023-04-29 19:05:18,642][133614] Worker 2 uses CPU cores [6, 7, 8]
+[2023-04-29 19:05:18,648][133597] ConvEncoder: input_channels=3
+[2023-04-29 19:05:18,758][133597] Conv encoder output size: 512
+[2023-04-29 19:05:18,758][133597] Policy head output size: 512
+[2023-04-29 19:05:18,790][133597] Created Actor Critic model with architecture:
+[2023-04-29 19:05:18,790][133597] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2023-04-29 19:05:20,571][133597] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-04-29 19:05:20,572][133597] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
+[2023-04-29 19:05:20,603][133597] Loading model from checkpoint
+[2023-04-29 19:05:20,607][133597] Loaded experiment state at self.train_step=978, self.env_steps=4005888
+[2023-04-29 19:05:20,607][133597] Initialized policy 0 weights for model version 978
+[2023-04-29 19:05:20,610][133597] LearnerWorker_p0 finished initialization!
+[2023-04-29 19:05:20,610][133597] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:05:20,727][133610] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:05:20,728][133610] RunningMeanStd input shape: (1,)
+[2023-04-29 19:05:20,735][133610] ConvEncoder: input_channels=3
+[2023-04-29 19:05:20,794][133610] Conv encoder output size: 512
+[2023-04-29 19:05:20,794][133610] Policy head output size: 512
+[2023-04-29 19:05:20,912][108205] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4005888. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-04-29 19:05:21,937][108205] Inference worker 0-0 is ready!
+[2023-04-29 19:05:21,938][108205] All inference workers are ready! Signal rollout workers to start!
+[2023-04-29 19:05:21,984][133613] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,985][133618] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,988][133615] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,990][133611] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,991][133612] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,993][133617] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:21,993][133616] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:22,000][133614] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:05:22,396][133611] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,396][133614] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,396][133613] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,397][133615] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,397][133612] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,398][133617] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,581][133612] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,582][133615] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,583][133613] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,616][133611] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,632][133614] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,807][133617] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:22,809][133616] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:22,836][133615] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:22,857][133612] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:22,872][133611] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:22,893][133618] Decorrelating experience for 0 frames...
+[2023-04-29 19:05:23,027][133616] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:23,051][133617] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:23,083][133613] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:23,101][133612] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,280][133618] Decorrelating experience for 32 frames...
+[2023-04-29 19:05:23,298][133616] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:23,324][133617] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,348][133613] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,508][133618] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:23,508][133615] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,563][133616] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,581][133614] Decorrelating experience for 64 frames...
+[2023-04-29 19:05:23,773][133618] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,829][133611] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:23,845][133614] Decorrelating experience for 96 frames...
+[2023-04-29 19:05:24,309][133597] Signal inference workers to stop experience collection...
+[2023-04-29 19:05:24,312][133610] InferenceWorker_p0-w0: stopping experience collection
+[2023-04-29 19:05:25,912][108205] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 507.2. Samples: 2536. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-04-29 19:05:25,912][108205] Avg episode reward: [(0, '2.019')]
+[2023-04-29 19:05:26,586][133597] Signal inference workers to resume experience collection...
+[2023-04-29 19:05:26,587][133610] InferenceWorker_p0-w0: resuming experience collection
+[2023-04-29 19:05:26,587][133597] Stopping Batcher_0...
+[2023-04-29 19:05:26,587][133597] Loop batcher_evt_loop terminating...
+[2023-04-29 19:05:26,593][133613] Stopping RolloutWorker_w3...
+[2023-04-29 19:05:26,593][133616] Stopping RolloutWorker_w5...
+[2023-04-29 19:05:26,594][133613] Loop rollout_proc3_evt_loop terminating...
+[2023-04-29 19:05:26,594][133616] Loop rollout_proc5_evt_loop terminating...
+[2023-04-29 19:05:26,594][133612] Stopping RolloutWorker_w1...
+[2023-04-29 19:05:26,595][133617] Stopping RolloutWorker_w6...
+[2023-04-29 19:05:26,595][133615] Stopping RolloutWorker_w4...
+[2023-04-29 19:05:26,595][133618] Stopping RolloutWorker_w7...
+[2023-04-29 19:05:26,595][133612] Loop rollout_proc1_evt_loop terminating...
+[2023-04-29 19:05:26,595][133614] Stopping RolloutWorker_w2...
+[2023-04-29 19:05:26,595][133615] Loop rollout_proc4_evt_loop terminating...
+[2023-04-29 19:05:26,595][133617] Loop rollout_proc6_evt_loop terminating...
+[2023-04-29 19:05:26,595][133618] Loop rollout_proc7_evt_loop terminating...
+[2023-04-29 19:05:26,595][133611] Stopping RolloutWorker_w0...
+[2023-04-29 19:05:26,595][133614] Loop rollout_proc2_evt_loop terminating...
+[2023-04-29 19:05:26,595][133611] Loop rollout_proc0_evt_loop terminating...
+[2023-04-29 19:05:26,596][133610] Weights refcount: 2 0
+[2023-04-29 19:05:26,597][133610] Stopping InferenceWorker_p0-w0...
+[2023-04-29 19:05:26,598][133610] Loop inference_proc0-0_evt_loop terminating...
+[2023-04-29 19:05:26,598][108205] Component Batcher_0 stopped!
+[2023-04-29 19:05:26,601][108205] Component RolloutWorker_w3 stopped!
+[2023-04-29 19:05:26,602][108205] Component RolloutWorker_w5 stopped!
+[2023-04-29 19:05:26,603][108205] Component RolloutWorker_w1 stopped!
+[2023-04-29 19:05:26,604][108205] Component RolloutWorker_w6 stopped!
+[2023-04-29 19:05:26,604][108205] Component RolloutWorker_w4 stopped!
+[2023-04-29 19:05:26,605][108205] Component RolloutWorker_w7 stopped!
+[2023-04-29 19:05:26,606][108205] Component RolloutWorker_w2 stopped!
+[2023-04-29 19:05:26,607][108205] Component RolloutWorker_w0 stopped!
+[2023-04-29 19:05:26,607][108205] Component InferenceWorker_p0-w0 stopped!
+[2023-04-29 19:05:26,740][108205] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 108205], exiting...
+[2023-04-29 19:05:26,741][108205] Runner profile tree view:
+main_loop: 9.2016
+[2023-04-29 19:05:26,742][108205] Collected {0: 4009984}, FPS: 445.1
+[2023-04-29 19:05:27,185][133597] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000980_4014080.pth...
+[2023-04-29 19:05:27,217][133597] Removing /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000517_2117632.pth
+[2023-04-29 19:05:27,218][133597] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000980_4014080.pth...
+[2023-04-29 19:05:27,250][133597] Stopping LearnerWorker_p0...
+[2023-04-29 19:05:27,250][133597] Loop learner_proc0_evt_loop terminating...
+[2023-04-29 19:17:05,974][139883] Saving configuration to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json...
+[2023-04-29 19:17:05,975][139883] Rollout worker 0 uses device cpu
+[2023-04-29 19:17:05,975][139883] Rollout worker 1 uses device cpu
+[2023-04-29 19:17:05,976][139883] Rollout worker 2 uses device cpu
+[2023-04-29 19:17:05,976][139883] Rollout worker 3 uses device cpu
+[2023-04-29 19:17:05,977][139883] Rollout worker 4 uses device cpu
+[2023-04-29 19:17:05,977][139883] Rollout worker 5 uses device cpu
+[2023-04-29 19:17:05,978][139883] Rollout worker 6 uses device cpu
+[2023-04-29 19:17:05,978][139883] Rollout worker 7 uses device cpu
+[2023-04-29 19:17:06,002][139883] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:17:06,002][139883] InferenceWorker_p0-w0: min num requests: 2
+[2023-04-29 19:17:06,060][139883] Starting all processes...
+[2023-04-29 19:17:06,061][139883] Starting process learner_proc0
+[2023-04-29 19:17:06,110][139883] Starting all processes...
+[2023-04-29 19:17:06,114][139883] Starting process inference_proc0-0
+[2023-04-29 19:17:06,114][139883] Starting process rollout_proc0
+[2023-04-29 19:17:06,114][139883] Starting process rollout_proc1
+[2023-04-29 19:17:06,114][139883] Starting process rollout_proc2
+[2023-04-29 19:17:06,115][139883] Starting process rollout_proc3
+[2023-04-29 19:17:06,115][139883] Starting process rollout_proc4
+[2023-04-29 19:17:06,115][139883] Starting process rollout_proc5
+[2023-04-29 19:17:06,116][139883] Starting process rollout_proc6
+[2023-04-29 19:17:06,116][139883] Starting process rollout_proc7
+[2023-04-29 19:17:06,979][141009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:17:06,979][141009] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2023-04-29 19:17:06,997][141024] Worker 1 uses CPU cores [3, 4, 5]
+[2023-04-29 19:17:07,016][141009] Num visible devices: 1
+[2023-04-29 19:17:07,024][141026] Worker 3 uses CPU cores [9, 10, 11]
+[2023-04-29 19:17:07,027][141030] Worker 7 uses CPU cores [21, 22, 23]
+[2023-04-29 19:17:07,038][141028] Worker 4 uses CPU cores [12, 13, 14]
+[2023-04-29 19:17:07,039][141022] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:17:07,039][141022] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2023-04-29 19:17:07,045][141023] Worker 0 uses CPU cores [0, 1, 2]
+[2023-04-29 19:17:07,053][141022] Num visible devices: 1
+[2023-04-29 19:17:07,054][141009] Starting seed is not provided
+[2023-04-29 19:17:07,054][141009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:17:07,054][141009] Initializing actor-critic model on device cuda:0
+[2023-04-29 19:17:07,054][141009] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:17:07,055][141009] RunningMeanStd input shape: (1,)
+[2023-04-29 19:17:07,059][141027] Worker 5 uses CPU cores [15, 16, 17]
+[2023-04-29 19:17:07,065][141009] ConvEncoder: input_channels=3
+[2023-04-29 19:17:07,066][141025] Worker 2 uses CPU cores [6, 7, 8]
+[2023-04-29 19:17:07,085][141029] Worker 6 uses CPU cores [18, 19, 20]
+[2023-04-29 19:17:07,208][141009] Conv encoder output size: 512
+[2023-04-29 19:17:07,208][141009] Policy head output size: 512
+[2023-04-29 19:17:07,247][141009] Created Actor Critic model with architecture:
+[2023-04-29 19:17:07,247][141009] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2023-04-29 19:17:08,982][141009] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-04-29 19:17:08,983][141009] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000980_4014080.pth...
+[2023-04-29 19:17:08,999][141009] Loading model from checkpoint
+[2023-04-29 19:17:09,002][141009] Loaded experiment state at self.train_step=980, self.env_steps=4014080
+[2023-04-29 19:17:09,002][141009] Initialized policy 0 weights for model version 980
+[2023-04-29 19:17:09,005][141009] LearnerWorker_p0 finished initialization!
+[2023-04-29 19:17:09,006][141009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-04-29 19:17:09,125][141022] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:17:09,126][141022] RunningMeanStd input shape: (1,)
+[2023-04-29 19:17:09,133][141022] ConvEncoder: input_channels=3
+[2023-04-29 19:17:09,191][141022] Conv encoder output size: 512
+[2023-04-29 19:17:09,191][141022] Policy head output size: 512
+[2023-04-29 19:17:09,422][139883] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4014080. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-04-29 19:17:10,104][139883] Inference worker 0-0 is ready!
+[2023-04-29 19:17:10,104][139883] All inference workers are ready! Signal rollout workers to start!
+[2023-04-29 19:17:10,122][141028] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,123][141024] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,123][141029] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,123][141030] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,123][141025] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,124][141026] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,124][141027] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,124][141023] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:10,323][141026] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,324][141029] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,324][141025] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,325][141023] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,327][141028] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,332][141024] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,495][141026] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,495][141025] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,519][141024] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,520][141030] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,557][141029] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,699][141026] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:10,700][141030] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,754][141027] Decorrelating experience for 0 frames...
+[2023-04-29 19:17:10,784][141029] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:10,909][141023] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:10,909][141025] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:10,911][141026] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:10,944][141030] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:10,978][141027] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:11,007][141024] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:11,118][141029] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,144][141025] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,193][141030] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,214][141024] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,214][141027] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:11,327][141028] Decorrelating experience for 32 frames...
+[2023-04-29 19:17:11,522][141027] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,550][141023] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:11,812][141028] Decorrelating experience for 64 frames...
+[2023-04-29 19:17:11,844][141023] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:11,880][141009] Signal inference workers to stop experience collection...
+[2023-04-29 19:17:11,883][141022] InferenceWorker_p0-w0: stopping experience collection
+[2023-04-29 19:17:12,064][141028] Decorrelating experience for 96 frames...
+[2023-04-29 19:17:13,307][141009] Signal inference workers to resume experience collection...
+[2023-04-29 19:17:13,307][141022] InferenceWorker_p0-w0: resuming experience collection
+[2023-04-29 19:17:13,308][141009] Stopping Batcher_0...
+[2023-04-29 19:17:13,308][141009] Loop batcher_evt_loop terminating...
+[2023-04-29 19:17:13,313][141026] Stopping RolloutWorker_w3...
+[2023-04-29 19:17:13,313][141027] Stopping RolloutWorker_w5...
+[2023-04-29 19:17:13,313][141023] Stopping RolloutWorker_w0...
+[2023-04-29 19:17:13,313][141026] Loop rollout_proc3_evt_loop terminating...
+[2023-04-29 19:17:13,313][141027] Loop rollout_proc5_evt_loop terminating...
+[2023-04-29 19:17:13,313][141023] Loop rollout_proc0_evt_loop terminating...
+[2023-04-29 19:17:13,313][141024] Stopping RolloutWorker_w1...
+[2023-04-29 19:17:13,313][141030] Stopping RolloutWorker_w7...
+[2023-04-29 19:17:13,313][141024] Loop rollout_proc1_evt_loop terminating...
+[2023-04-29 19:17:13,314][141030] Loop rollout_proc7_evt_loop terminating...
+[2023-04-29 19:17:13,313][141025] Stopping RolloutWorker_w2...
+[2023-04-29 19:17:13,314][141025] Loop rollout_proc2_evt_loop terminating...
+[2023-04-29 19:17:13,314][141028] Stopping RolloutWorker_w4...
+[2023-04-29 19:17:13,314][141029] Stopping RolloutWorker_w6...
+[2023-04-29 19:17:13,314][141028] Loop rollout_proc4_evt_loop terminating...
+[2023-04-29 19:17:13,314][141029] Loop rollout_proc6_evt_loop terminating...
+[2023-04-29 19:17:13,314][141022] Weights refcount: 2 0
+[2023-04-29 19:17:13,316][141022] Stopping InferenceWorker_p0-w0...
+[2023-04-29 19:17:13,316][141022] Loop inference_proc0-0_evt_loop terminating...
+[2023-04-29 19:17:13,319][139883] Component Batcher_0 stopped!
+[2023-04-29 19:17:13,322][139883] Component RolloutWorker_w3 stopped!
+[2023-04-29 19:17:13,322][139883] Component RolloutWorker_w5 stopped!
+[2023-04-29 19:17:13,323][139883] Component RolloutWorker_w0 stopped!
+[2023-04-29 19:17:13,324][139883] Component RolloutWorker_w1 stopped!
+[2023-04-29 19:17:13,325][139883] Component RolloutWorker_w7 stopped!
+[2023-04-29 19:17:13,325][139883] Component RolloutWorker_w2 stopped!
+[2023-04-29 19:17:13,326][139883] Component RolloutWorker_w4 stopped!
+[2023-04-29 19:17:13,327][139883] Component RolloutWorker_w6 stopped!
+[2023-04-29 19:17:13,328][139883] Component InferenceWorker_p0-w0 stopped!
+[2023-04-29 19:17:13,811][141009] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000982_4022272.pth...
+[2023-04-29 19:17:13,840][141009] Removing /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth
+[2023-04-29 19:17:13,842][141009] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000982_4022272.pth...
+[2023-04-29 19:17:13,873][141009] Stopping LearnerWorker_p0...
+[2023-04-29 19:17:13,874][141009] Loop learner_proc0_evt_loop terminating...
+[2023-04-29 19:17:13,874][139883] Component LearnerWorker_p0 stopped!
+[2023-04-29 19:17:13,874][139883] Waiting for process learner_proc0 to stop...
+[2023-04-29 19:17:14,380][139883] Waiting for process inference_proc0-0 to join...
+[2023-04-29 19:17:14,381][139883] Waiting for process rollout_proc0 to join...
+[2023-04-29 19:17:14,382][139883] Waiting for process rollout_proc1 to join...
+[2023-04-29 19:17:14,382][139883] Waiting for process rollout_proc2 to join...
+[2023-04-29 19:17:14,383][139883] Waiting for process rollout_proc3 to join...
+[2023-04-29 19:17:14,383][139883] Waiting for process rollout_proc4 to join...
+[2023-04-29 19:17:14,384][139883] Waiting for process rollout_proc5 to join...
+[2023-04-29 19:17:14,385][139883] Waiting for process rollout_proc6 to join...
+[2023-04-29 19:17:14,385][139883] Waiting for process rollout_proc7 to join...
+[2023-04-29 19:17:14,386][139883] Batcher 0 profile tree view:
+batching: 0.0301, releasing_batches: 0.0006
+[2023-04-29 19:17:14,386][139883] InferenceWorker_p0-w0 profile tree view:
+update_model: 0.0040
+wait_policy: 0.0000
+  wait_policy_total: 0.8549
+one_step: 0.0023
+  handle_policy_step: 0.8927
+    deserialize: 0.0172, stack: 0.0021, obs_to_device_normalize: 0.1624, forward: 0.5764, send_messages: 0.0347
+    prepare_outputs: 0.0846
+      to_cpu: 0.0663
+[2023-04-29 19:17:14,387][139883] Learner 0 profile tree view:
+misc: 0.0000, prepare_batch: 1.5413
+train: 0.6188
+  epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0004, kl_divergence: 0.0005, after_optimizer: 0.0074
+  calculate_losses: 0.0568
+    losses_init: 0.0000, forward_head: 0.0480, bptt_initial: 0.0040, tail: 0.0008, advantages_returns: 0.0005, losses: 0.0018
+    bptt: 0.0015
+      bptt_forward_core: 0.0014
+  update: 0.5533
+    clip: 0.0038
+[2023-04-29 19:17:14,387][139883] RolloutWorker_w0 profile tree view:
+wait_for_trajectories: 0.0001, enqueue_policy_requests: 0.0003
+[2023-04-29 19:17:14,388][139883] RolloutWorker_w7 profile tree view:
+wait_for_trajectories: 0.0004, enqueue_policy_requests: 0.0197, env_step: 0.3010, overhead: 0.0222, complete_rollouts: 0.0005
+save_policy_outputs: 0.0155
+  split_output_tensors: 0.0077
+[2023-04-29 19:17:14,389][139883] Loop Runner_EvtLoop terminating...
+[2023-04-29 19:17:14,389][139883] Runner profile tree view:
+main_loop: 8.3291
+[2023-04-29 19:17:14,390][139883] Collected {0: 4022272}, FPS: 983.5
+[2023-04-29 19:17:14,480][139883] Loading existing experiment configuration from /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json
+[2023-04-29 19:17:14,481][139883] Overriding arg 'num_workers' with value 1 passed from command line
+[2023-04-29 19:17:14,482][139883] Adding new argument 'no_render'=True that is not in the saved config file!
+[2023-04-29 19:17:14,482][139883] Adding new argument 'save_video'=True that is not in the saved config file!
+[2023-04-29 19:17:14,483][139883] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2023-04-29 19:17:14,483][139883] Adding new argument 'video_name'=None that is not in the saved config file!
+[2023-04-29 19:17:14,484][139883] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2023-04-29 19:17:14,485][139883] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2023-04-29 19:17:14,485][139883] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2023-04-29 19:17:14,486][139883] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2023-04-29 19:17:14,486][139883] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2023-04-29 19:17:14,487][139883] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2023-04-29 19:17:14,487][139883] Adding new argument 'train_script'=None that is not in the saved config file!
+[2023-04-29 19:17:14,488][139883] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2023-04-29 19:17:14,488][139883] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2023-04-29 19:17:14,494][139883] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-04-29 19:17:14,495][139883] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:17:14,496][139883] RunningMeanStd input shape: (1,)
+[2023-04-29 19:17:14,503][139883] ConvEncoder: input_channels=3
+[2023-04-29 19:17:14,588][139883] Conv encoder output size: 512
+[2023-04-29 19:17:14,589][139883] Policy head output size: 512
+[2023-04-29 19:17:16,265][139883] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000982_4022272.pth...
+[2023-04-29 19:17:17,030][139883] Num frames 100...
+[2023-04-29 19:17:17,120][139883] Num frames 200...
+[2023-04-29 19:17:17,212][139883] Num frames 300...
+[2023-04-29 19:17:17,344][139883] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2023-04-29 19:17:17,345][139883] Avg episode reward: 3.840, avg true_objective: 3.840
+[2023-04-29 19:17:17,361][139883] Num frames 400...
+[2023-04-29 19:17:17,454][139883] Num frames 500...
+[2023-04-29 19:17:17,541][139883] Num frames 600...
+[2023-04-29 19:17:17,628][139883] Num frames 700...
+[2023-04-29 19:17:17,715][139883] Num frames 800...
+[2023-04-29 19:17:17,815][139883] Num frames 900...
+[2023-04-29 19:17:17,902][139883] Avg episode rewards: #0: 5.640, true rewards: #0: 4.640
+[2023-04-29 19:17:17,903][139883] Avg episode reward: 5.640, avg true_objective: 4.640
+[2023-04-29 19:17:17,978][139883] Num frames 1000...
+[2023-04-29 19:17:18,072][139883] Num frames 1100...
+[2023-04-29 19:17:18,162][139883] Num frames 1200...
+[2023-04-29 19:17:18,256][139883] Num frames 1300...
+[2023-04-29 19:17:18,380][139883] Avg episode rewards: #0: 5.587, true rewards: #0: 4.587
+[2023-04-29 19:17:18,381][139883] Avg episode reward: 5.587, avg true_objective: 4.587
+[2023-04-29 19:17:18,406][139883] Num frames 1400...
+[2023-04-29 19:17:18,494][139883] Num frames 1500...
+[2023-04-29 19:17:18,586][139883] Num frames 1600...
+[2023-04-29 19:17:18,682][139883] Num frames 1700...
+[2023-04-29 19:17:18,797][139883] Avg episode rewards: #0: 5.150, true rewards: #0: 4.400
+[2023-04-29 19:17:18,797][139883] Avg episode reward: 5.150, avg true_objective: 4.400
+[2023-04-29 19:17:18,838][139883] Num frames 1800...
+[2023-04-29 19:17:18,947][139883] Num frames 1900...
+[2023-04-29 19:17:19,053][139883] Num frames 2000...
+[2023-04-29 19:17:19,150][139883] Num frames 2100...
+[2023-04-29 19:17:19,247][139883] Avg episode rewards: #0: 4.888, true rewards: #0: 4.288
+[2023-04-29 19:17:19,248][139883] Avg episode reward: 4.888, avg true_objective: 4.288
+[2023-04-29 19:17:19,305][139883] Num frames 2200...
+[2023-04-29 19:17:19,397][139883] Num frames 2300...
+[2023-04-29 19:17:19,486][139883] Num frames 2400...
+[2023-04-29 19:17:19,573][139883] Num frames 2500...
+[2023-04-29 19:17:19,653][139883] Avg episode rewards: #0: 4.713, true rewards: #0: 4.213
+[2023-04-29 19:17:19,654][139883] Avg episode reward: 4.713, avg true_objective: 4.213
+[2023-04-29 19:17:19,722][139883] Num frames 2600...
+[2023-04-29 19:17:19,815][139883] Num frames 2700...
+[2023-04-29 19:17:19,907][139883] Num frames 2800...
+[2023-04-29 19:17:20,017][139883] Num frames 2900...
+[2023-04-29 19:17:20,095][139883] Avg episode rewards: #0: 4.589, true rewards: #0: 4.160
+[2023-04-29 19:17:20,096][139883] Avg episode reward: 4.589, avg true_objective: 4.160
+[2023-04-29 19:17:20,184][139883] Num frames 3000...
+[2023-04-29 19:17:20,285][139883] Num frames 3100...
+[2023-04-29 19:17:20,374][139883] Num frames 3200...
+[2023-04-29 19:17:20,466][139883] Num frames 3300...
+[2023-04-29 19:17:20,601][139883] Avg episode rewards: #0: 4.865, true rewards: #0: 4.240
+[2023-04-29 19:17:20,602][139883] Avg episode reward: 4.865, avg true_objective: 4.240
+[2023-04-29 19:17:20,610][139883] Num frames 3400...
+[2023-04-29 19:17:20,698][139883] Num frames 3500...
+[2023-04-29 19:17:20,787][139883] Num frames 3600...
+[2023-04-29 19:17:20,883][139883] Num frames 3700...
+[2023-04-29 19:17:20,973][139883] Num frames 3800...
+[2023-04-29 19:17:21,035][139883] Avg episode rewards: #0: 4.898, true rewards: #0: 4.231
+[2023-04-29 19:17:21,036][139883] Avg episode reward: 4.898, avg true_objective: 4.231
+[2023-04-29 19:17:21,129][139883] Num frames 3900...
+[2023-04-29 19:17:21,226][139883] Num frames 4000...
+[2023-04-29 19:17:21,322][139883] Num frames 4100...
+[2023-04-29 19:17:21,464][139883] Avg episode rewards: #0: 4.992, true rewards: #0: 4.192
+[2023-04-29 19:17:21,465][139883] Avg episode reward: 4.992, avg true_objective: 4.192
+[2023-04-29 19:17:26,026][139883] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!
+[2023-04-29 19:19:04,743][139883] Loading existing experiment configuration from /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json
+[2023-04-29 19:19:04,743][139883] Overriding arg 'num_workers' with value 1 passed from command line
+[2023-04-29 19:19:04,744][139883] Adding new argument 'no_render'=True that is not in the saved config file!
+[2023-04-29 19:19:04,744][139883] Adding new argument 'save_video'=True that is not in the saved config file!
+[2023-04-29 19:19:04,745][139883] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2023-04-29 19:19:04,746][139883] Adding new argument 'video_name'=None that is not in the saved config file!
+[2023-04-29 19:19:04,746][139883] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2023-04-29 19:19:04,747][139883] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2023-04-29 19:19:04,747][139883] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2023-04-29 19:19:04,748][139883] Adding new argument 'hf_repository'='ItchyB/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2023-04-29 19:19:04,748][139883] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2023-04-29 19:19:04,749][139883] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2023-04-29 19:19:04,750][139883] Adding new argument 'train_script'=None that is not in the saved config file!
+[2023-04-29 19:19:04,750][139883] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2023-04-29 19:19:04,751][139883] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2023-04-29 19:19:04,754][139883] RunningMeanStd input shape: (3, 72, 128)
+[2023-04-29 19:19:04,755][139883] RunningMeanStd input shape: (1,)
+[2023-04-29 19:19:04,761][139883] ConvEncoder: input_channels=3
+[2023-04-29 19:19:04,782][139883] Conv encoder output size: 512
+[2023-04-29 19:19:04,783][139883] Policy head output size: 512
+[2023-04-29 19:19:04,801][139883] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000982_4022272.pth...
+[2023-04-29 19:19:05,173][139883] Num frames 100...
+[2023-04-29 19:19:05,315][139883] Num frames 200...
+[2023-04-29 19:19:05,452][139883] Num frames 300...
+[2023-04-29 19:19:05,634][139883] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2023-04-29 19:19:05,635][139883] Avg episode reward: 3.840, avg true_objective: 3.840
+[2023-04-29 19:19:05,658][139883] Num frames 400...
+[2023-04-29 19:19:05,797][139883] Num frames 500...
+[2023-04-29 19:19:05,955][139883] Num frames 600...
+[2023-04-29 19:19:06,130][139883] Num frames 700...
+[2023-04-29 19:19:06,262][139883] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2023-04-29 19:19:06,263][139883] Avg episode reward: 3.840, avg true_objective: 3.840
+[2023-04-29 19:19:06,308][139883] Num frames 800...
+[2023-04-29 19:19:06,440][139883] Num frames 900...
+[2023-04-29 19:19:06,579][139883] Num frames 1000...
+[2023-04-29 19:19:06,697][139883] Num frames 1100...
+[2023-04-29 19:19:06,855][139883] Avg episode rewards: #0: 3.947, true rewards: #0: 3.947
+[2023-04-29 19:19:06,856][139883] Avg episode reward: 3.947, avg true_objective: 3.947
+[2023-04-29 19:19:06,874][139883] Num frames 1200...
+[2023-04-29 19:19:06,994][139883] Num frames 1300...
+[2023-04-29 19:19:07,103][139883] Num frames 1400...
+[2023-04-29 19:19:07,216][139883] Num frames 1500...
+[2023-04-29 19:19:07,344][139883] Avg episode rewards: #0: 3.920, true rewards: #0: 3.920
+[2023-04-29 19:19:07,345][139883] Avg episode reward: 3.920, avg true_objective: 3.920
+[2023-04-29 19:19:07,381][139883] Num frames 1600...
+[2023-04-29 19:19:07,495][139883] Num frames 1700...
+[2023-04-29 19:19:07,612][139883] Num frames 1800...
+[2023-04-29 19:19:07,728][139883] Num frames 1900...
+[2023-04-29 19:19:07,843][139883] Avg episode rewards: #0: 3.904, true rewards: #0: 3.904
+[2023-04-29 19:19:07,844][139883] Avg episode reward: 3.904, avg true_objective: 3.904
+[2023-04-29 19:19:07,913][139883] Num frames 2000...
+[2023-04-29 19:19:08,031][139883] Num frames 2100...
+[2023-04-29 19:19:08,140][139883] Num frames 2200...
+[2023-04-29 19:19:08,252][139883] Num frames 2300...
+[2023-04-29 19:19:08,373][139883] Num frames 2400...
+[2023-04-29 19:19:08,531][139883] Avg episode rewards: #0: 4.493, true rewards: #0: 4.160
+[2023-04-29 19:19:08,532][139883] Avg episode reward: 4.493, avg true_objective: 4.160
+[2023-04-29 19:19:08,539][139883] Num frames 2500...
+[2023-04-29 19:19:08,658][139883] Num frames 2600...
+[2023-04-29 19:19:08,778][139883] Num frames 2700...
+[2023-04-29 19:19:08,915][139883] Num frames 2800...
+[2023-04-29 19:19:09,064][139883] Avg episode rewards: #0: 4.400, true rewards: #0: 4.114
+[2023-04-29 19:19:09,064][139883] Avg episode reward: 4.400, avg true_objective: 4.114
+[2023-04-29 19:19:09,089][139883] Num frames 2900...
+[2023-04-29 19:19:09,216][139883] Num frames 3000...
+[2023-04-29 19:19:09,334][139883] Num frames 3100...
+[2023-04-29 19:19:09,449][139883] Num frames 3200...
+[2023-04-29 19:19:09,580][139883] Num frames 3300...
+[2023-04-29 19:19:09,671][139883] Avg episode rewards: #0: 4.535, true rewards: #0: 4.160
+[2023-04-29 19:19:09,671][139883] Avg episode reward: 4.535, avg true_objective: 4.160
+[2023-04-29 19:19:09,763][139883] Num frames 3400...
+[2023-04-29 19:19:09,885][139883] Num frames 3500...
+[2023-04-29 19:19:10,010][139883] Num frames 3600...
+[2023-04-29 19:19:10,129][139883] Num frames 3700...
+[2023-04-29 19:19:10,196][139883] Avg episode rewards: #0: 4.458, true rewards: #0: 4.124
+[2023-04-29 19:19:10,197][139883] Avg episode reward: 4.458, avg true_objective: 4.124
+[2023-04-29 19:19:10,307][139883] Num frames 3800...
+[2023-04-29 19:19:10,426][139883] Num frames 3900...
+[2023-04-29 19:19:10,533][139883] Num frames 4000...
+[2023-04-29 19:19:10,685][139883] Avg episode rewards: #0: 4.396, true rewards: #0: 4.096
+[2023-04-29 19:19:10,686][139883] Avg episode reward: 4.396, avg true_objective: 4.096
+[2023-04-29 19:19:15,388][139883] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!