ItchyB commited on
Commit
3a0c27d
1 Parent(s): f34f522

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1682648908.CAPTAIN-AMERICA ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b993fd09d818e80b3179fe4de0062329a95ce69e5abb72456a2f2957911d2f04
3
+ size 203043
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_health_gathering_supreme
15
+ type: doom_health_gathering_supreme
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 3.90 +/- 0.60
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r ItchyB/rl_course_vizdoom_health_gathering_supreme
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000000280_1146880_reward_4.759.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcaaf9e66ed8a5018842b94744820c9151474166439b62b504424a4d88d7d11c
3
+ size 34928156
checkpoint_p0/checkpoint_000000517_2117632.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0c59bb3cc2ce57a2de7b60b6b68f82fe821daea8347c180125a1c83e633479
3
+ size 34928156
checkpoint_p0/checkpoint_000000978_4005888.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c041d4c8f7a9b660a19117b7299ccba70cefb4725d2466c1564aad4956ef249
3
+ size 34928156
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_health_gathering_supreme",
5
+ "experiment": "default_experiment",
6
+ "train_dir": "/home/byron/projects/rl-learning-course/unit-08/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 4,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 4000000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000",
134
+ "cli_args": {
135
+ "env": "doom_health_gathering_supreme",
136
+ "num_workers": 8,
137
+ "num_envs_per_worker": 4,
138
+ "train_for_env_steps": 4000000
139
+ },
140
+ "git_hash": "unknown",
141
+ "git_repo_name": "not a git repository"
142
+ }
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1acb924a30b27425c5dcc73ae4ba44ec05f28cea902cafff951ef665970113
3
+ size 5397393
sf_log.txt ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-04-27 22:28:30,298][19320] Saving configuration to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json...
2
+ [2023-04-27 22:28:30,299][19320] Rollout worker 0 uses device cpu
3
+ [2023-04-27 22:28:30,300][19320] Rollout worker 1 uses device cpu
4
+ [2023-04-27 22:28:30,300][19320] Rollout worker 2 uses device cpu
5
+ [2023-04-27 22:28:30,302][19320] Rollout worker 3 uses device cpu
6
+ [2023-04-27 22:28:30,302][19320] Rollout worker 4 uses device cpu
7
+ [2023-04-27 22:28:30,303][19320] Rollout worker 5 uses device cpu
8
+ [2023-04-27 22:28:30,304][19320] Rollout worker 6 uses device cpu
9
+ [2023-04-27 22:28:30,304][19320] Rollout worker 7 uses device cpu
10
+ [2023-04-27 22:28:30,345][19320] Using GPUs [0] for process 0 (actually maps to GPUs [0])
11
+ [2023-04-27 22:28:30,345][19320] InferenceWorker_p0-w0: min num requests: 2
12
+ [2023-04-27 22:28:30,363][19320] Starting all processes...
13
+ [2023-04-27 22:28:30,364][19320] Starting process learner_proc0
14
+ [2023-04-27 22:28:30,489][19320] Starting all processes...
15
+ [2023-04-27 22:28:30,494][19320] Starting process inference_proc0-0
16
+ [2023-04-27 22:28:30,494][19320] Starting process rollout_proc0
17
+ [2023-04-27 22:28:30,495][19320] Starting process rollout_proc1
18
+ [2023-04-27 22:28:30,495][19320] Starting process rollout_proc2
19
+ [2023-04-27 22:28:30,496][19320] Starting process rollout_proc3
20
+ [2023-04-27 22:28:30,496][19320] Starting process rollout_proc4
21
+ [2023-04-27 22:28:30,496][19320] Starting process rollout_proc5
22
+ [2023-04-27 22:28:30,497][19320] Starting process rollout_proc6
23
+ [2023-04-27 22:28:30,497][19320] Starting process rollout_proc7
24
+ [2023-04-27 22:28:31,380][26612] Using GPUs [0] for process 0 (actually maps to GPUs [0])
25
+ [2023-04-27 22:28:31,380][26612] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
26
+ [2023-04-27 22:28:31,403][26612] Num visible devices: 1
27
+ [2023-04-27 22:28:31,424][26630] Worker 3 uses CPU cores [9, 10, 11]
28
+ [2023-04-27 22:28:31,427][26612] Starting seed is not provided
29
+ [2023-04-27 22:28:31,427][26612] Using GPUs [0] for process 0 (actually maps to GPUs [0])
30
+ [2023-04-27 22:28:31,427][26612] Initializing actor-critic model on device cuda:0
31
+ [2023-04-27 22:28:31,427][26612] RunningMeanStd input shape: (3, 72, 128)
32
+ [2023-04-27 22:28:31,428][26612] RunningMeanStd input shape: (1,)
33
+ [2023-04-27 22:28:31,430][26629] Worker 2 uses CPU cores [6, 7, 8]
34
+ [2023-04-27 22:28:31,434][26628] Worker 1 uses CPU cores [3, 4, 5]
35
+ [2023-04-27 22:28:31,439][26612] ConvEncoder: input_channels=3
36
+ [2023-04-27 22:28:31,444][26638] Worker 7 uses CPU cores [21, 22, 23]
37
+ [2023-04-27 22:28:31,455][26626] Worker 0 uses CPU cores [0, 1, 2]
38
+ [2023-04-27 22:28:31,457][26632] Worker 5 uses CPU cores [15, 16, 17]
39
+ [2023-04-27 22:28:31,467][26631] Worker 4 uses CPU cores [12, 13, 14]
40
+ [2023-04-27 22:28:31,474][26627] Using GPUs [0] for process 0 (actually maps to GPUs [0])
41
+ [2023-04-27 22:28:31,474][26627] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
42
+ [2023-04-27 22:28:31,481][26657] Worker 6 uses CPU cores [18, 19, 20]
43
+ [2023-04-27 22:28:31,507][26627] Num visible devices: 1
44
+ [2023-04-27 22:28:31,580][26612] Conv encoder output size: 512
45
+ [2023-04-27 22:28:31,581][26612] Policy head output size: 512
46
+ [2023-04-27 22:28:31,603][26612] Created Actor Critic model with architecture:
47
+ [2023-04-27 22:28:31,603][26612] ActorCriticSharedWeights(
48
+ (obs_normalizer): ObservationNormalizer(
49
+ (running_mean_std): RunningMeanStdDictInPlace(
50
+ (running_mean_std): ModuleDict(
51
+ (obs): RunningMeanStdInPlace()
52
+ )
53
+ )
54
+ )
55
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
56
+ (encoder): VizdoomEncoder(
57
+ (basic_encoder): ConvEncoder(
58
+ (enc): RecursiveScriptModule(
59
+ original_name=ConvEncoderImpl
60
+ (conv_head): RecursiveScriptModule(
61
+ original_name=Sequential
62
+ (0): RecursiveScriptModule(original_name=Conv2d)
63
+ (1): RecursiveScriptModule(original_name=ELU)
64
+ (2): RecursiveScriptModule(original_name=Conv2d)
65
+ (3): RecursiveScriptModule(original_name=ELU)
66
+ (4): RecursiveScriptModule(original_name=Conv2d)
67
+ (5): RecursiveScriptModule(original_name=ELU)
68
+ )
69
+ (mlp_layers): RecursiveScriptModule(
70
+ original_name=Sequential
71
+ (0): RecursiveScriptModule(original_name=Linear)
72
+ (1): RecursiveScriptModule(original_name=ELU)
73
+ )
74
+ )
75
+ )
76
+ )
77
+ (core): ModelCoreRNN(
78
+ (core): GRU(512, 512)
79
+ )
80
+ (decoder): MlpDecoder(
81
+ (mlp): Identity()
82
+ )
83
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
84
+ (action_parameterization): ActionParameterizationDefault(
85
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
86
+ )
87
+ )
88
+ [2023-04-27 22:28:33,372][26612] Using optimizer <class 'torch.optim.adam.Adam'>
89
+ [2023-04-27 22:28:33,373][26612] No checkpoints found
90
+ [2023-04-27 22:28:33,373][26612] Did not load from checkpoint, starting from scratch!
91
+ [2023-04-27 22:28:33,373][26612] Initialized policy 0 weights for model version 0
92
+ [2023-04-27 22:28:33,376][26612] LearnerWorker_p0 finished initialization!
93
+ [2023-04-27 22:28:33,376][26612] Using GPUs [0] for process 0 (actually maps to GPUs [0])
94
+ [2023-04-27 22:28:33,815][19320] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
95
+ [2023-04-27 22:28:34,070][26627] RunningMeanStd input shape: (3, 72, 128)
96
+ [2023-04-27 22:28:34,071][26627] RunningMeanStd input shape: (1,)
97
+ [2023-04-27 22:28:34,079][26627] ConvEncoder: input_channels=3
98
+ [2023-04-27 22:28:34,158][26627] Conv encoder output size: 512
99
+ [2023-04-27 22:28:34,159][26627] Policy head output size: 512
100
+ [2023-04-27 22:28:34,896][19320] Inference worker 0-0 is ready!
101
+ [2023-04-27 22:28:34,897][19320] All inference workers are ready! Signal rollout workers to start!
102
+ [2023-04-27 22:28:34,912][26657] Doom resolution: 160x120, resize resolution: (128, 72)
103
+ [2023-04-27 22:28:34,913][26630] Doom resolution: 160x120, resize resolution: (128, 72)
104
+ [2023-04-27 22:28:34,914][26638] Doom resolution: 160x120, resize resolution: (128, 72)
105
+ [2023-04-27 22:28:34,914][26629] Doom resolution: 160x120, resize resolution: (128, 72)
106
+ [2023-04-27 22:28:34,914][26626] Doom resolution: 160x120, resize resolution: (128, 72)
107
+ [2023-04-27 22:28:34,915][26632] Doom resolution: 160x120, resize resolution: (128, 72)
108
+ [2023-04-27 22:28:34,915][26631] Doom resolution: 160x120, resize resolution: (128, 72)
109
+ [2023-04-27 22:28:34,915][26628] Doom resolution: 160x120, resize resolution: (128, 72)
110
+ [2023-04-27 22:28:34,949][26628] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process...
111
+ [2023-04-27 22:28:34,950][26628] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=()
112
+ Traceback (most recent call last):
113
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 228, in _game_init
114
+ self.game.init()
115
+ vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.
116
+
117
+ During handling of the above exception, another exception occurred:
118
+
119
+ Traceback (most recent call last):
120
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal
121
+ slot_callable(*args)
122
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 150, in init
123
+ env_runner.init(self.timing)
124
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 418, in init
125
+ self._reset()
126
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 430, in _reset
127
+ observations, info = e.reset(seed=seed) # new way of doing seeding since Gym 0.26.0
128
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/gym/core.py", line 323, in reset
129
+ return self.env.reset(**kwargs)
130
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 125, in reset
131
+ obs, info = self.env.reset(**kwargs)
132
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 110, in reset
133
+ obs, info = self.env.reset(**kwargs)
134
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 30, in reset
135
+ return self.env.reset(**kwargs)
136
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/gym/core.py", line 379, in reset
137
+ obs, info = self.env.reset(**kwargs)
138
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sample_factory/envs/env_wrappers.py", line 84, in reset
139
+ obs, info = self.env.reset(**kwargs)
140
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/gym/core.py", line 323, in reset
141
+ return self.env.reset(**kwargs)
142
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 51, in reset
143
+ return self.env.reset(**kwargs)
144
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 323, in reset
145
+ self._ensure_initialized()
146
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 274, in _ensure_initialized
147
+ self.initialize()
148
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 269, in initialize
149
+ self._game_init()
150
+ File "/home/byron/miniconda3/envs/ml-agents/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 244, in _game_init
151
+ raise EnvCriticalError()
152
+ sample_factory.envs.env_utils.EnvCriticalError
153
+ [2023-04-27 22:28:34,951][26628] Unhandled exception in evt loop rollout_proc1_evt_loop
154
+ [2023-04-27 22:28:35,135][26629] Decorrelating experience for 0 frames...
155
+ [2023-04-27 22:28:35,135][26630] Decorrelating experience for 0 frames...
156
+ [2023-04-27 22:28:35,135][26657] Decorrelating experience for 0 frames...
157
+ [2023-04-27 22:28:35,141][26631] Decorrelating experience for 0 frames...
158
+ [2023-04-27 22:28:35,326][26629] Decorrelating experience for 32 frames...
159
+ [2023-04-27 22:28:35,327][26631] Decorrelating experience for 32 frames...
160
+ [2023-04-27 22:28:35,361][26630] Decorrelating experience for 32 frames...
161
+ [2023-04-27 22:28:35,399][26632] Decorrelating experience for 0 frames...
162
+ [2023-04-27 22:28:35,410][26626] Decorrelating experience for 0 frames...
163
+ [2023-04-27 22:28:35,566][26631] Decorrelating experience for 64 frames...
164
+ [2023-04-27 22:28:35,567][26657] Decorrelating experience for 32 frames...
165
+ [2023-04-27 22:28:35,596][26632] Decorrelating experience for 32 frames...
166
+ [2023-04-27 22:28:35,596][26629] Decorrelating experience for 64 frames...
167
+ [2023-04-27 22:28:35,597][26630] Decorrelating experience for 64 frames...
168
+ [2023-04-27 22:28:35,783][26631] Decorrelating experience for 96 frames...
169
+ [2023-04-27 22:28:35,784][26657] Decorrelating experience for 64 frames...
170
+ [2023-04-27 22:28:35,825][26626] Decorrelating experience for 32 frames...
171
+ [2023-04-27 22:28:35,826][26629] Decorrelating experience for 96 frames...
172
+ [2023-04-27 22:28:35,860][26630] Decorrelating experience for 96 frames...
173
+ [2023-04-27 22:28:36,028][26626] Decorrelating experience for 64 frames...
174
+ [2023-04-27 22:28:36,029][26638] Decorrelating experience for 0 frames...
175
+ [2023-04-27 22:28:36,236][26638] Decorrelating experience for 32 frames...
176
+ [2023-04-27 22:28:36,278][26626] Decorrelating experience for 96 frames...
177
+ [2023-04-27 22:28:36,483][26638] Decorrelating experience for 64 frames...
178
+ [2023-04-27 22:28:36,526][26657] Decorrelating experience for 96 frames...
179
+ [2023-04-27 22:28:36,742][26638] Decorrelating experience for 96 frames...
180
+ [2023-04-27 22:28:36,763][26632] Decorrelating experience for 64 frames...
181
+ [2023-04-27 22:28:37,009][26632] Decorrelating experience for 96 frames...
182
+ [2023-04-27 22:28:38,609][26612] Signal inference workers to stop experience collection...
183
+ [2023-04-27 22:28:38,611][26627] InferenceWorker_p0-w0: stopping experience collection
184
+ [2023-04-27 22:28:38,815][19320] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 130.4. Samples: 652. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
185
+ [2023-04-27 22:28:38,816][19320] Avg episode reward: [(0, '2.566')]
186
+ [2023-04-27 22:28:40,413][26612] Signal inference workers to resume experience collection...
187
+ [2023-04-27 22:28:40,414][26627] InferenceWorker_p0-w0: resuming experience collection
188
+ [2023-04-27 22:28:43,469][26627] Updated weights for policy 0, policy_version 10 (0.0582)
189
+ [2023-04-27 22:28:43,815][19320] Fps is (10 sec: 4505.6, 60 sec: 4505.6, 300 sec: 4505.6). Total num frames: 45056. Throughput: 0: 307.2. Samples: 3072. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
190
+ [2023-04-27 22:28:43,816][19320] Avg episode reward: [(0, '4.236')]
191
+ [2023-04-27 22:28:46,007][26627] Updated weights for policy 0, policy_version 20 (0.0008)
192
+ [2023-04-27 22:28:47,944][26627] Updated weights for policy 0, policy_version 30 (0.0012)
193
+ [2023-04-27 22:28:48,815][19320] Fps is (10 sec: 13926.4, 60 sec: 9284.3, 300 sec: 9284.3). Total num frames: 139264. Throughput: 0: 1720.3. Samples: 25804. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
194
+ [2023-04-27 22:28:48,815][19320] Avg episode reward: [(0, '4.246')]
195
+ [2023-04-27 22:28:48,821][26612] Saving new best policy, reward=4.246!
196
+ [2023-04-27 22:28:50,306][26627] Updated weights for policy 0, policy_version 40 (0.0008)
197
+ [2023-04-27 22:28:50,340][19320] Heartbeat connected on Batcher_0
198
+ [2023-04-27 22:28:50,342][19320] Heartbeat connected on LearnerWorker_p0
199
+ [2023-04-27 22:28:50,349][19320] Heartbeat connected on RolloutWorker_w0
200
+ [2023-04-27 22:28:50,350][19320] Heartbeat connected on InferenceWorker_p0-w0
201
+ [2023-04-27 22:28:50,353][19320] Heartbeat connected on RolloutWorker_w2
202
+ [2023-04-27 22:28:50,356][19320] Heartbeat connected on RolloutWorker_w3
203
+ [2023-04-27 22:28:50,358][19320] Heartbeat connected on RolloutWorker_w4
204
+ [2023-04-27 22:28:50,361][19320] Heartbeat connected on RolloutWorker_w5
205
+ [2023-04-27 22:28:50,365][19320] Heartbeat connected on RolloutWorker_w6
206
+ [2023-04-27 22:28:50,366][19320] Heartbeat connected on RolloutWorker_w7
207
+ [2023-04-27 22:28:52,258][26627] Updated weights for policy 0, policy_version 50 (0.0007)
208
+ [2023-04-27 22:28:53,815][19320] Fps is (10 sec: 18432.0, 60 sec: 11468.8, 300 sec: 11468.8). Total num frames: 229376. Throughput: 0: 2718.3. Samples: 54366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
209
+ [2023-04-27 22:28:53,816][19320] Avg episode reward: [(0, '4.546')]
210
+ [2023-04-27 22:28:53,817][26612] Saving new best policy, reward=4.546!
211
+ [2023-04-27 22:28:54,878][26627] Updated weights for policy 0, policy_version 60 (0.0013)
212
+ [2023-04-27 22:28:57,353][26627] Updated weights for policy 0, policy_version 70 (0.0011)
213
+ [2023-04-27 22:28:58,815][19320] Fps is (10 sec: 17612.6, 60 sec: 12615.6, 300 sec: 12615.6). Total num frames: 315392. Throughput: 0: 2677.7. Samples: 66944. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
214
+ [2023-04-27 22:28:58,816][19320] Avg episode reward: [(0, '4.345')]
215
+ [2023-04-27 22:28:59,354][26627] Updated weights for policy 0, policy_version 80 (0.0009)
216
+ [2023-04-27 22:29:02,010][26627] Updated weights for policy 0, policy_version 90 (0.0014)
217
+ [2023-04-27 22:29:03,815][19320] Fps is (10 sec: 15974.3, 60 sec: 12970.7, 300 sec: 12970.7). Total num frames: 389120. Throughput: 0: 3074.0. Samples: 92220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
218
+ [2023-04-27 22:29:03,815][19320] Avg episode reward: [(0, '4.536')]
219
+ [2023-04-27 22:29:04,867][26627] Updated weights for policy 0, policy_version 100 (0.0020)
220
+ [2023-04-27 22:29:07,514][26627] Updated weights for policy 0, policy_version 110 (0.0011)
221
+ [2023-04-27 22:29:08,815][19320] Fps is (10 sec: 15974.6, 60 sec: 13575.3, 300 sec: 13575.3). Total num frames: 475136. Throughput: 0: 3287.9. Samples: 115078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
222
+ [2023-04-27 22:29:08,816][19320] Avg episode reward: [(0, '4.565')]
223
+ [2023-04-27 22:29:08,829][26612] Saving new best policy, reward=4.565!
224
+ [2023-04-27 22:29:09,550][26627] Updated weights for policy 0, policy_version 120 (0.0007)
225
+ [2023-04-27 22:29:11,733][26627] Updated weights for policy 0, policy_version 130 (0.0009)
226
+ [2023-04-27 22:29:13,474][26627] Updated weights for policy 0, policy_version 140 (0.0009)
227
+ [2023-04-27 22:29:13,815][19320] Fps is (10 sec: 19251.2, 60 sec: 14540.8, 300 sec: 14540.8). Total num frames: 581632. Throughput: 0: 3246.8. Samples: 129872. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
228
+ [2023-04-27 22:29:13,816][19320] Avg episode reward: [(0, '4.521')]
229
+ [2023-04-27 22:29:15,075][26627] Updated weights for policy 0, policy_version 150 (0.0006)
230
+ [2023-04-27 22:29:16,757][26627] Updated weights for policy 0, policy_version 160 (0.0008)
231
+ [2023-04-27 22:29:18,355][26627] Updated weights for policy 0, policy_version 170 (0.0008)
232
+ [2023-04-27 22:29:18,815][19320] Fps is (10 sec: 22937.4, 60 sec: 15655.8, 300 sec: 15655.8). Total num frames: 704512. Throughput: 0: 3678.4. Samples: 165526. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
233
+ [2023-04-27 22:29:18,816][19320] Avg episode reward: [(0, '4.458')]
234
+ [2023-04-27 22:29:20,047][26627] Updated weights for policy 0, policy_version 180 (0.0008)
235
+ [2023-04-27 22:29:21,736][26627] Updated weights for policy 0, policy_version 190 (0.0009)
236
+ [2023-04-27 22:29:23,376][26627] Updated weights for policy 0, policy_version 200 (0.0009)
237
+ [2023-04-27 22:29:23,815][19320] Fps is (10 sec: 24576.1, 60 sec: 16547.9, 300 sec: 16547.9). Total num frames: 827392. Throughput: 0: 4482.5. Samples: 202364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
238
+ [2023-04-27 22:29:23,815][19320] Avg episode reward: [(0, '4.534')]
239
+ [2023-04-27 22:29:25,033][26627] Updated weights for policy 0, policy_version 210 (0.0008)
240
+ [2023-04-27 22:29:26,702][26627] Updated weights for policy 0, policy_version 220 (0.0007)
241
+ [2023-04-27 22:29:28,383][26627] Updated weights for policy 0, policy_version 230 (0.0007)
242
+ [2023-04-27 22:29:28,815][19320] Fps is (10 sec: 24576.2, 60 sec: 17277.7, 300 sec: 17277.7). Total num frames: 950272. Throughput: 0: 4842.8. Samples: 221000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
243
+ [2023-04-27 22:29:28,816][19320] Avg episode reward: [(0, '4.485')]
244
+ [2023-04-27 22:29:30,029][26627] Updated weights for policy 0, policy_version 240 (0.0008)
245
+ [2023-04-27 22:29:31,795][26627] Updated weights for policy 0, policy_version 250 (0.0008)
246
+ [2023-04-27 22:29:33,815][19320] Fps is (10 sec: 23346.9, 60 sec: 17681.0, 300 sec: 17681.0). Total num frames: 1060864. Throughput: 0: 5146.0. Samples: 257374. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
247
+ [2023-04-27 22:29:33,816][19320] Avg episode reward: [(0, '4.669')]
248
+ [2023-04-27 22:29:33,818][26612] Saving new best policy, reward=4.669!
249
+ [2023-04-27 22:29:34,002][26627] Updated weights for policy 0, policy_version 260 (0.0008)
250
+ [2023-04-27 22:29:36,837][26627] Updated weights for policy 0, policy_version 270 (0.0010)
251
+ [2023-04-27 22:29:38,730][26627] Updated weights for policy 0, policy_version 280 (0.0007)
252
+ [2023-04-27 22:29:38,815][19320] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 17644.3). Total num frames: 1146880. Throughput: 0: 5076.0. Samples: 282788. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
253
+ [2023-04-27 22:29:38,816][19320] Avg episode reward: [(0, '4.759')]
254
+ [2023-04-27 22:29:38,821][26612] Saving new best policy, reward=4.759!
255
+ [2023-04-27 22:29:41,483][26627] Updated weights for policy 0, policy_version 290 (0.0013)
256
+ [2023-04-27 22:29:43,815][19320] Fps is (10 sec: 16384.2, 60 sec: 19660.8, 300 sec: 17495.8). Total num frames: 1224704. Throughput: 0: 5061.7. Samples: 294720. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
257
+ [2023-04-27 22:29:43,817][19320] Avg episode reward: [(0, '4.496')]
258
+ [2023-04-27 22:29:43,996][26627] Updated weights for policy 0, policy_version 300 (0.0009)
259
+ [2023-04-27 22:29:45,921][26627] Updated weights for policy 0, policy_version 310 (0.0009)
260
+ [2023-04-27 22:29:47,745][26627] Updated weights for policy 0, policy_version 320 (0.0007)
261
+ [2023-04-27 22:29:48,815][19320] Fps is (10 sec: 17203.2, 60 sec: 19660.8, 300 sec: 17585.5). Total num frames: 1318912. Throughput: 0: 5135.5. Samples: 323316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
262
+ [2023-04-27 22:29:48,816][19320] Avg episode reward: [(0, '4.280')]
263
+ [2023-04-27 22:29:50,625][26627] Updated weights for policy 0, policy_version 330 (0.0007)
264
+ [2023-04-27 22:29:52,896][26627] Updated weights for policy 0, policy_version 340 (0.0009)
265
+ [2023-04-27 22:29:53,815][19320] Fps is (10 sec: 17612.0, 60 sec: 19524.1, 300 sec: 17510.3). Total num frames: 1400832. Throughput: 0: 5191.6. Samples: 348700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
266
+ [2023-04-27 22:29:53,816][19320] Avg episode reward: [(0, '4.253')]
267
+ [2023-04-27 22:29:56,313][26627] Updated weights for policy 0, policy_version 350 (0.0015)
268
+ [2023-04-27 22:29:58,498][26627] Updated weights for policy 0, policy_version 360 (0.0007)
269
+ [2023-04-27 22:29:58,815][19320] Fps is (10 sec: 15974.4, 60 sec: 19387.8, 300 sec: 17396.0). Total num frames: 1478656. Throughput: 0: 5059.1. Samples: 357532. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
270
+ [2023-04-27 22:29:58,816][19320] Avg episode reward: [(0, '4.385')]
271
+ [2023-04-27 22:30:01,271][26627] Updated weights for policy 0, policy_version 370 (0.0012)
272
+ [2023-04-27 22:30:03,815][19320] Fps is (10 sec: 14746.0, 60 sec: 19319.4, 300 sec: 17203.2). Total num frames: 1548288. Throughput: 0: 4768.2. Samples: 380094. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
273
+ [2023-04-27 22:30:03,816][19320] Avg episode reward: [(0, '4.464')]
274
+ [2023-04-27 22:30:04,133][26627] Updated weights for policy 0, policy_version 380 (0.0012)
275
+ [2023-04-27 22:30:06,244][26627] Updated weights for policy 0, policy_version 390 (0.0007)
276
+ [2023-04-27 22:30:07,903][26627] Updated weights for policy 0, policy_version 400 (0.0008)
277
+ [2023-04-27 22:30:08,815][19320] Fps is (10 sec: 18022.4, 60 sec: 19729.1, 300 sec: 17461.9). Total num frames: 1658880. Throughput: 0: 4616.3. Samples: 410098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
278
+ [2023-04-27 22:30:08,816][19320] Avg episode reward: [(0, '4.384')]
279
+ [2023-04-27 22:30:09,574][26627] Updated weights for policy 0, policy_version 410 (0.0009)
280
+ [2023-04-27 22:30:11,225][26627] Updated weights for policy 0, policy_version 420 (0.0008)
281
+ [2023-04-27 22:30:12,922][26627] Updated weights for policy 0, policy_version 430 (0.0010)
282
+ [2023-04-27 22:30:13,815][19320] Fps is (10 sec: 23347.7, 60 sec: 20002.1, 300 sec: 17817.6). Total num frames: 1781760. Throughput: 0: 4617.6. Samples: 428792. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
283
+ [2023-04-27 22:30:13,816][19320] Avg episode reward: [(0, '4.328')]
284
+ [2023-04-27 22:30:14,554][26627] Updated weights for policy 0, policy_version 440 (0.0007)
285
+ [2023-04-27 22:30:16,206][26627] Updated weights for policy 0, policy_version 450 (0.0007)
286
+ [2023-04-27 22:30:17,871][26627] Updated weights for policy 0, policy_version 460 (0.0009)
287
+ [2023-04-27 22:30:18,815][19320] Fps is (10 sec: 24985.5, 60 sec: 20070.4, 300 sec: 18178.4). Total num frames: 1908736. Throughput: 0: 4631.5. Samples: 465790. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
288
+ [2023-04-27 22:30:18,816][19320] Avg episode reward: [(0, '4.316')]
289
+ [2023-04-27 22:30:19,460][26627] Updated weights for policy 0, policy_version 470 (0.0008)
290
+ [2023-04-27 22:30:21,046][26627] Updated weights for policy 0, policy_version 480 (0.0009)
291
+ [2023-04-27 22:30:22,710][26627] Updated weights for policy 0, policy_version 490 (0.0009)
292
+ [2023-04-27 22:30:23,815][19320] Fps is (10 sec: 24985.5, 60 sec: 20070.4, 300 sec: 18469.2). Total num frames: 2031616. Throughput: 0: 4906.8. Samples: 503596. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
293
+ [2023-04-27 22:30:23,816][19320] Avg episode reward: [(0, '4.388')]
294
+ [2023-04-27 22:30:24,368][26627] Updated weights for policy 0, policy_version 500 (0.0007)
295
+ [2023-04-27 22:30:25,982][26627] Updated weights for policy 0, policy_version 510 (0.0009)
296
+ [2023-04-27 22:30:28,815][19320] Fps is (10 sec: 20889.7, 60 sec: 19456.0, 300 sec: 18414.2). Total num frames: 2117632. Throughput: 0: 5056.4. Samples: 522256. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
297
+ [2023-04-27 22:30:28,816][19320] Avg episode reward: [(0, '4.397')]
298
+ [2023-04-27 22:30:28,820][26612] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000517_2117632.pth...
299
+ [2023-04-27 22:30:29,361][26627] Updated weights for policy 0, policy_version 520 (0.0015)
300
+ [2023-04-27 22:30:31,575][26627] Updated weights for policy 0, policy_version 530 (0.0009)
301
+ [2023-04-27 22:30:33,673][26627] Updated weights for policy 0, policy_version 540 (0.0007)
302
+ [2023-04-27 22:30:33,815][19320] Fps is (10 sec: 18022.5, 60 sec: 19183.0, 300 sec: 18432.0). Total num frames: 2211840. Throughput: 0: 4930.8. Samples: 545200. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
303
+ [2023-04-27 22:30:33,816][19320] Avg episode reward: [(0, '4.420')]
304
+ [2023-04-27 22:30:35,975][26627] Updated weights for policy 0, policy_version 550 (0.0008)
305
+ [2023-04-27 22:30:38,045][26627] Updated weights for policy 0, policy_version 560 (0.0010)
306
+ [2023-04-27 22:30:38,815][19320] Fps is (10 sec: 18841.5, 60 sec: 19319.5, 300 sec: 18448.4). Total num frames: 2306048. Throughput: 0: 4988.7. Samples: 573190. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
307
+ [2023-04-27 22:30:38,816][19320] Avg episode reward: [(0, '4.549')]
308
+ [2023-04-27 22:30:40,672][26627] Updated weights for policy 0, policy_version 570 (0.0011)
309
+ [2023-04-27 22:30:43,150][26627] Updated weights for policy 0, policy_version 580 (0.0012)
310
+ [2023-04-27 22:30:43,815][19320] Fps is (10 sec: 17612.8, 60 sec: 19387.7, 300 sec: 18369.0). Total num frames: 2387968. Throughput: 0: 5056.8. Samples: 585088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
311
+ [2023-04-27 22:30:43,816][19320] Avg episode reward: [(0, '4.520')]
312
+ [2023-04-27 22:30:45,278][26627] Updated weights for policy 0, policy_version 590 (0.0007)
313
+ [2023-04-27 22:30:47,790][26627] Updated weights for policy 0, policy_version 600 (0.0007)
314
+ [2023-04-27 22:30:48,815][19320] Fps is (10 sec: 17203.2, 60 sec: 19319.5, 300 sec: 18356.1). Total num frames: 2478080. Throughput: 0: 5159.8. Samples: 612286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
315
+ [2023-04-27 22:30:48,816][19320] Avg episode reward: [(0, '4.326')]
316
+ [2023-04-27 22:30:50,346][26627] Updated weights for policy 0, policy_version 610 (0.0008)
317
+ [2023-04-27 22:30:53,815][19320] Fps is (10 sec: 14745.6, 60 sec: 18910.0, 300 sec: 18110.2). Total num frames: 2535424. Throughput: 0: 4977.5. Samples: 634084. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
318
+ [2023-04-27 22:30:53,816][19320] Avg episode reward: [(0, '4.424')]
319
+ [2023-04-27 22:30:53,893][26627] Updated weights for policy 0, policy_version 620 (0.0019)
320
+ [2023-04-27 22:30:56,047][26627] Updated weights for policy 0, policy_version 630 (0.0007)
321
+ [2023-04-27 22:30:58,480][26627] Updated weights for policy 0, policy_version 640 (0.0011)
322
+ [2023-04-27 22:30:58,815][19320] Fps is (10 sec: 14745.6, 60 sec: 19114.7, 300 sec: 18107.1). Total num frames: 2625536. Throughput: 0: 4805.1. Samples: 645020. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
323
+ [2023-04-27 22:30:58,815][19320] Avg episode reward: [(0, '4.450')]
324
+ [2023-04-27 22:31:00,719][26627] Updated weights for policy 0, policy_version 650 (0.0008)
325
+ [2023-04-27 22:31:02,385][26627] Updated weights for policy 0, policy_version 660 (0.0008)
326
+ [2023-04-27 22:31:03,815][19320] Fps is (10 sec: 20070.4, 60 sec: 19797.4, 300 sec: 18240.9). Total num frames: 2736128. Throughput: 0: 4617.8. Samples: 673592. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
327
+ [2023-04-27 22:31:03,816][19320] Avg episode reward: [(0, '4.602')]
328
+ [2023-04-27 22:31:04,041][26627] Updated weights for policy 0, policy_version 670 (0.0010)
329
+ [2023-04-27 22:31:05,642][26627] Updated weights for policy 0, policy_version 680 (0.0009)
330
+ [2023-04-27 22:31:07,304][26627] Updated weights for policy 0, policy_version 690 (0.0007)
331
+ [2023-04-27 22:31:08,815][19320] Fps is (10 sec: 23756.8, 60 sec: 20070.4, 300 sec: 18471.6). Total num frames: 2863104. Throughput: 0: 4611.2. Samples: 711102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
332
+ [2023-04-27 22:31:08,816][19320] Avg episode reward: [(0, '4.465')]
333
+ [2023-04-27 22:31:08,913][26627] Updated weights for policy 0, policy_version 700 (0.0008)
334
+ [2023-04-27 22:31:10,614][26627] Updated weights for policy 0, policy_version 710 (0.0006)
335
+ [2023-04-27 22:31:12,270][26627] Updated weights for policy 0, policy_version 720 (0.0007)
336
+ [2023-04-27 22:31:13,815][19320] Fps is (10 sec: 24985.6, 60 sec: 20070.4, 300 sec: 18662.4). Total num frames: 2985984. Throughput: 0: 4608.2. Samples: 729626. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
337
+ [2023-04-27 22:31:13,816][19320] Avg episode reward: [(0, '4.396')]
338
+ [2023-04-27 22:31:13,869][26627] Updated weights for policy 0, policy_version 730 (0.0007)
339
+ [2023-04-27 22:31:15,506][26627] Updated weights for policy 0, policy_version 740 (0.0009)
340
+ [2023-04-27 22:31:17,160][26627] Updated weights for policy 0, policy_version 750 (0.0008)
341
+ [2023-04-27 22:31:18,815][19320] Fps is (10 sec: 24576.0, 60 sec: 20002.1, 300 sec: 18841.6). Total num frames: 3108864. Throughput: 0: 4932.6. Samples: 767168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
342
+ [2023-04-27 22:31:18,816][19320] Avg episode reward: [(0, '4.364')]
343
+ [2023-04-27 22:31:18,848][26627] Updated weights for policy 0, policy_version 760 (0.0009)
344
+ [2023-04-27 22:31:21,623][26627] Updated weights for policy 0, policy_version 770 (0.0013)
345
+ [2023-04-27 22:31:23,815][19320] Fps is (10 sec: 20070.4, 60 sec: 19251.2, 300 sec: 18745.2). Total num frames: 3186688. Throughput: 0: 4907.6. Samples: 794032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
346
+ [2023-04-27 22:31:23,816][19320] Avg episode reward: [(0, '4.513')]
347
+ [2023-04-27 22:31:24,158][26627] Updated weights for policy 0, policy_version 780 (0.0013)
348
+ [2023-04-27 22:31:26,913][26627] Updated weights for policy 0, policy_version 790 (0.0012)
349
+ [2023-04-27 22:31:28,815][19320] Fps is (10 sec: 14336.0, 60 sec: 18909.9, 300 sec: 18584.1). Total num frames: 3252224. Throughput: 0: 4932.8. Samples: 807066. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
350
+ [2023-04-27 22:31:28,815][19320] Avg episode reward: [(0, '4.367')]
351
+ [2023-04-27 22:31:30,689][26627] Updated weights for policy 0, policy_version 800 (0.0012)
352
+ [2023-04-27 22:31:33,035][26627] Updated weights for policy 0, policy_version 810 (0.0011)
353
+ [2023-04-27 22:31:33,815][19320] Fps is (10 sec: 14745.6, 60 sec: 18705.1, 300 sec: 18523.0). Total num frames: 3334144. Throughput: 0: 4712.5. Samples: 824348. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
354
+ [2023-04-27 22:31:33,816][19320] Avg episode reward: [(0, '4.320')]
355
+ [2023-04-27 22:31:35,061][26627] Updated weights for policy 0, policy_version 820 (0.0007)
356
+ [2023-04-27 22:31:37,474][26627] Updated weights for policy 0, policy_version 830 (0.0010)
357
+ [2023-04-27 22:31:38,815][19320] Fps is (10 sec: 16793.5, 60 sec: 18568.5, 300 sec: 18487.4). Total num frames: 3420160. Throughput: 0: 4863.2. Samples: 852926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
358
+ [2023-04-27 22:31:38,816][19320] Avg episode reward: [(0, '4.490')]
359
+ [2023-04-27 22:31:40,044][26627] Updated weights for policy 0, policy_version 840 (0.0007)
360
+ [2023-04-27 22:31:42,664][26627] Updated weights for policy 0, policy_version 850 (0.0014)
361
+ [2023-04-27 22:31:43,815][19320] Fps is (10 sec: 15974.4, 60 sec: 18432.0, 300 sec: 18388.9). Total num frames: 3493888. Throughput: 0: 4869.0. Samples: 864126. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
362
+ [2023-04-27 22:31:43,816][19320] Avg episode reward: [(0, '4.454')]
363
+ [2023-04-27 22:31:45,339][26627] Updated weights for policy 0, policy_version 860 (0.0014)
364
+ [2023-04-27 22:31:47,414][26627] Updated weights for policy 0, policy_version 870 (0.0015)
365
+ [2023-04-27 22:31:48,815][19320] Fps is (10 sec: 16793.2, 60 sec: 18500.2, 300 sec: 18400.5). Total num frames: 3588096. Throughput: 0: 4780.7. Samples: 888726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
366
+ [2023-04-27 22:31:48,816][19320] Avg episode reward: [(0, '4.718')]
367
+ [2023-04-27 22:31:49,524][26627] Updated weights for policy 0, policy_version 880 (0.0008)
368
+ [2023-04-27 22:31:51,616][26627] Updated weights for policy 0, policy_version 890 (0.0012)
369
+ [2023-04-27 22:31:53,815][19320] Fps is (10 sec: 18841.4, 60 sec: 19114.6, 300 sec: 18411.5). Total num frames: 3682304. Throughput: 0: 4563.8. Samples: 916472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
370
+ [2023-04-27 22:31:53,816][19320] Avg episode reward: [(0, '4.287')]
371
+ [2023-04-27 22:31:53,847][26627] Updated weights for policy 0, policy_version 900 (0.0011)
372
+ [2023-04-27 22:31:55,513][26627] Updated weights for policy 0, policy_version 910 (0.0009)
373
+ [2023-04-27 22:31:57,111][26627] Updated weights for policy 0, policy_version 920 (0.0007)
374
+ [2023-04-27 22:31:58,754][26627] Updated weights for policy 0, policy_version 930 (0.0007)
375
+ [2023-04-27 22:31:58,815][19320] Fps is (10 sec: 22118.6, 60 sec: 19729.0, 300 sec: 18581.8). Total num frames: 3809280. Throughput: 0: 4564.9. Samples: 935046. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
376
+ [2023-04-27 22:31:58,816][19320] Avg episode reward: [(0, '4.318')]
377
+ [2023-04-27 22:32:00,439][26627] Updated weights for policy 0, policy_version 940 (0.0007)
378
+ [2023-04-27 22:32:02,146][26627] Updated weights for policy 0, policy_version 950 (0.0007)
379
+ [2023-04-27 22:32:03,815][19320] Fps is (10 sec: 24576.2, 60 sec: 19865.6, 300 sec: 18705.1). Total num frames: 3928064. Throughput: 0: 4548.2. Samples: 971838. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
380
+ [2023-04-27 22:32:03,816][19320] Avg episode reward: [(0, '4.347')]
381
+ [2023-04-27 22:32:03,837][26627] Updated weights for policy 0, policy_version 960 (0.0008)
382
+ [2023-04-27 22:32:05,497][26627] Updated weights for policy 0, policy_version 970 (0.0008)
383
+ [2023-04-27 22:32:06,763][26612] Stopping Batcher_0...
384
+ [2023-04-27 22:32:06,763][26612] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
385
+ [2023-04-27 22:32:06,763][26612] Loop batcher_evt_loop terminating...
386
+ [2023-04-27 22:32:06,763][19320] Component Batcher_0 stopped!
387
+ [2023-04-27 22:32:06,764][19320] Component RolloutWorker_w1 process died already! Don't wait for it.
388
+ [2023-04-27 22:32:06,768][26638] Stopping RolloutWorker_w7...
389
+ [2023-04-27 22:32:06,769][26638] Loop rollout_proc7_evt_loop terminating...
390
+ [2023-04-27 22:32:06,769][26630] Stopping RolloutWorker_w3...
391
+ [2023-04-27 22:32:06,769][26631] Stopping RolloutWorker_w4...
392
+ [2023-04-27 22:32:06,769][26630] Loop rollout_proc3_evt_loop terminating...
393
+ [2023-04-27 22:32:06,769][26631] Loop rollout_proc4_evt_loop terminating...
394
+ [2023-04-27 22:32:06,769][26629] Stopping RolloutWorker_w2...
395
+ [2023-04-27 22:32:06,768][19320] Component RolloutWorker_w7 stopped!
396
+ [2023-04-27 22:32:06,769][26629] Loop rollout_proc2_evt_loop terminating...
397
+ [2023-04-27 22:32:06,770][19320] Component RolloutWorker_w3 stopped!
398
+ [2023-04-27 22:32:06,770][26632] Stopping RolloutWorker_w5...
399
+ [2023-04-27 22:32:06,771][26632] Loop rollout_proc5_evt_loop terminating...
400
+ [2023-04-27 22:32:06,771][19320] Component RolloutWorker_w4 stopped!
401
+ [2023-04-27 22:32:06,772][26657] Stopping RolloutWorker_w6...
402
+ [2023-04-27 22:32:06,773][26657] Loop rollout_proc6_evt_loop terminating...
403
+ [2023-04-27 22:32:06,773][19320] Component RolloutWorker_w2 stopped!
404
+ [2023-04-27 22:32:06,774][19320] Component RolloutWorker_w5 stopped!
405
+ [2023-04-27 22:32:06,775][19320] Component RolloutWorker_w6 stopped!
406
+ [2023-04-27 22:32:06,775][26627] Weights refcount: 2 0
407
+ [2023-04-27 22:32:06,777][26627] Stopping InferenceWorker_p0-w0...
408
+ [2023-04-27 22:32:06,777][26627] Loop inference_proc0-0_evt_loop terminating...
409
+ [2023-04-27 22:32:06,777][19320] Component InferenceWorker_p0-w0 stopped!
410
+ [2023-04-27 22:32:06,786][26626] Stopping RolloutWorker_w0...
411
+ [2023-04-27 22:32:06,787][26626] Loop rollout_proc0_evt_loop terminating...
412
+ [2023-04-27 22:32:06,786][19320] Component RolloutWorker_w0 stopped!
413
+ [2023-04-27 22:32:06,811][26612] Saving /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
414
+ [2023-04-27 22:32:06,858][26612] Stopping LearnerWorker_p0...
415
+ [2023-04-27 22:32:06,859][26612] Loop learner_proc0_evt_loop terminating...
416
+ [2023-04-27 22:32:06,859][19320] Component LearnerWorker_p0 stopped!
417
+ [2023-04-27 22:32:06,860][19320] Waiting for process learner_proc0 to stop...
418
+ [2023-04-27 22:32:07,560][19320] Waiting for process inference_proc0-0 to join...
419
+ [2023-04-27 22:32:07,561][19320] Waiting for process rollout_proc0 to join...
420
+ [2023-04-27 22:32:07,562][19320] Waiting for process rollout_proc1 to join...
421
+ [2023-04-27 22:32:07,563][19320] Waiting for process rollout_proc2 to join...
422
+ [2023-04-27 22:32:07,564][19320] Waiting for process rollout_proc3 to join...
423
+ [2023-04-27 22:32:07,564][19320] Waiting for process rollout_proc4 to join...
424
+ [2023-04-27 22:32:07,565][19320] Waiting for process rollout_proc5 to join...
425
+ [2023-04-27 22:32:07,566][19320] Waiting for process rollout_proc6 to join...
426
+ [2023-04-27 22:32:07,567][19320] Waiting for process rollout_proc7 to join...
427
+ [2023-04-27 22:32:07,567][19320] Batcher 0 profile tree view:
428
+ batching: 12.7812, releasing_batches: 0.0195
429
+ [2023-04-27 22:32:07,568][19320] InferenceWorker_p0-w0 profile tree view:
430
+ wait_policy: 0.0000
431
+ wait_policy_total: 2.8370
432
+ update_model: 2.8123
433
+ weight_update: 0.0007
434
+ one_step: 0.0023
435
+ handle_policy_step: 195.8112
436
+ deserialize: 5.6395, stack: 0.7269, obs_to_device_normalize: 44.5135, forward: 68.6296, send_messages: 17.9614
437
+ prepare_outputs: 52.0115
438
+ to_cpu: 45.1667
439
+ [2023-04-27 22:32:07,569][19320] Learner 0 profile tree view:
440
+ misc: 0.0037, prepare_batch: 9.4485
441
+ train: 22.5017
442
+ epoch_init: 0.0033, minibatch_init: 0.0045, losses_postprocess: 0.4800, kl_divergence: 0.4893, after_optimizer: 6.8805
443
+ calculate_losses: 7.1964
444
+ losses_init: 0.0020, forward_head: 0.5407, bptt_initial: 3.3368, tail: 0.4113, advantages_returns: 0.1193, losses: 1.7274
445
+ bptt: 0.9502
446
+ bptt_forward_core: 0.9150
447
+ update: 7.1922
448
+ clip: 0.8865
449
+ [2023-04-27 22:32:07,569][19320] RolloutWorker_w0 profile tree view:
450
+ wait_for_trajectories: 0.0991, enqueue_policy_requests: 5.1038, env_step: 94.9605, overhead: 6.8366, complete_rollouts: 0.2024
451
+ save_policy_outputs: 6.2491
452
+ split_output_tensors: 2.9626
453
+ [2023-04-27 22:32:07,570][19320] RolloutWorker_w7 profile tree view:
454
+ wait_for_trajectories: 0.0969, enqueue_policy_requests: 5.1019, env_step: 95.1988, overhead: 6.8454, complete_rollouts: 0.2055
455
+ save_policy_outputs: 6.4640
456
+ split_output_tensors: 3.0653
457
+ [2023-04-27 22:32:07,571][19320] Loop Runner_EvtLoop terminating...
458
+ [2023-04-27 22:32:07,572][19320] Runner profile tree view:
459
+ main_loop: 217.2086
460
+ [2023-04-27 22:32:07,573][19320] Collected {0: 4005888}, FPS: 18442.6
461
+ [2023-04-27 22:33:30,876][19320] Loading existing experiment configuration from /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json
462
+ [2023-04-27 22:33:30,877][19320] Overriding arg 'num_workers' with value 1 passed from command line
463
+ [2023-04-27 22:33:30,878][19320] Adding new argument 'no_render'=True that is not in the saved config file!
464
+ [2023-04-27 22:33:30,878][19320] Adding new argument 'save_video'=True that is not in the saved config file!
465
+ [2023-04-27 22:33:30,879][19320] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
466
+ [2023-04-27 22:33:30,879][19320] Adding new argument 'video_name'=None that is not in the saved config file!
467
+ [2023-04-27 22:33:30,880][19320] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
468
+ [2023-04-27 22:33:30,881][19320] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
469
+ [2023-04-27 22:33:30,882][19320] Adding new argument 'push_to_hub'=False that is not in the saved config file!
470
+ [2023-04-27 22:33:30,882][19320] Adding new argument 'hf_repository'=None that is not in the saved config file!
471
+ [2023-04-27 22:33:30,883][19320] Adding new argument 'policy_index'=0 that is not in the saved config file!
472
+ [2023-04-27 22:33:30,883][19320] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
473
+ [2023-04-27 22:33:30,884][19320] Adding new argument 'train_script'=None that is not in the saved config file!
474
+ [2023-04-27 22:33:30,885][19320] Adding new argument 'enjoy_script'=None that is not in the saved config file!
475
+ [2023-04-27 22:33:30,885][19320] Using frameskip 1 and render_action_repeat=4 for evaluation
476
+ [2023-04-27 22:33:30,891][19320] Doom resolution: 160x120, resize resolution: (128, 72)
477
+ [2023-04-27 22:33:30,892][19320] RunningMeanStd input shape: (3, 72, 128)
478
+ [2023-04-27 22:33:30,893][19320] RunningMeanStd input shape: (1,)
479
+ [2023-04-27 22:33:30,904][19320] ConvEncoder: input_channels=3
480
+ [2023-04-27 22:33:30,994][19320] Conv encoder output size: 512
481
+ [2023-04-27 22:33:30,995][19320] Policy head output size: 512
482
+ [2023-04-27 22:33:32,427][19320] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
483
+ [2023-04-27 22:33:33,708][19320] Num frames 100...
484
+ [2023-04-27 22:33:33,796][19320] Num frames 200...
485
+ [2023-04-27 22:33:33,876][19320] Num frames 300...
486
+ [2023-04-27 22:33:33,996][19320] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
487
+ [2023-04-27 22:33:33,997][19320] Avg episode reward: 3.840, avg true_objective: 3.840
488
+ [2023-04-27 22:33:34,014][19320] Num frames 400...
489
+ [2023-04-27 22:33:34,096][19320] Num frames 500...
490
+ [2023-04-27 22:33:34,173][19320] Num frames 600...
491
+ [2023-04-27 22:33:34,251][19320] Num frames 700...
492
+ [2023-04-27 22:33:34,358][19320] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
493
+ [2023-04-27 22:33:34,359][19320] Avg episode reward: 3.840, avg true_objective: 3.840
494
+ [2023-04-27 22:33:34,389][19320] Num frames 800...
495
+ [2023-04-27 22:33:34,468][19320] Num frames 900...
496
+ [2023-04-27 22:33:34,542][19320] Num frames 1000...
497
+ [2023-04-27 22:33:34,617][19320] Num frames 1100...
498
+ [2023-04-27 22:33:34,736][19320] Avg episode rewards: #0: 4.280, true rewards: #0: 3.947
499
+ [2023-04-27 22:33:34,737][19320] Avg episode reward: 4.280, avg true_objective: 3.947
500
+ [2023-04-27 22:33:34,752][19320] Num frames 1200...
501
+ [2023-04-27 22:33:34,835][19320] Num frames 1300...
502
+ [2023-04-27 22:33:34,915][19320] Num frames 1400...
503
+ [2023-04-27 22:33:34,997][19320] Num frames 1500...
504
+ [2023-04-27 22:33:35,092][19320] Num frames 1600...
505
+ [2023-04-27 22:33:35,144][19320] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000
506
+ [2023-04-27 22:33:35,145][19320] Avg episode reward: 4.500, avg true_objective: 4.000
507
+ [2023-04-27 22:33:35,229][19320] Num frames 1700...
508
+ [2023-04-27 22:33:35,311][19320] Num frames 1800...
509
+ [2023-04-27 22:33:35,391][19320] Num frames 1900...
510
+ [2023-04-27 22:33:35,474][19320] Num frames 2000...
511
+ [2023-04-27 22:33:35,569][19320] Avg episode rewards: #0: 4.696, true rewards: #0: 4.096
512
+ [2023-04-27 22:33:35,570][19320] Avg episode reward: 4.696, avg true_objective: 4.096
513
+ [2023-04-27 22:33:35,617][19320] Num frames 2100...
514
+ [2023-04-27 22:33:35,698][19320] Num frames 2200...
515
+ [2023-04-27 22:33:35,772][19320] Num frames 2300...
516
+ [2023-04-27 22:33:35,849][19320] Num frames 2400...
517
+ [2023-04-27 22:33:35,932][19320] Avg episode rewards: #0: 4.553, true rewards: #0: 4.053
518
+ [2023-04-27 22:33:35,933][19320] Avg episode reward: 4.553, avg true_objective: 4.053
519
+ [2023-04-27 22:33:35,989][19320] Num frames 2500...
520
+ [2023-04-27 22:33:36,070][19320] Num frames 2600...
521
+ [2023-04-27 22:33:36,157][19320] Num frames 2700...
522
+ [2023-04-27 22:33:36,236][19320] Num frames 2800...
523
+ [2023-04-27 22:33:36,319][19320] Num frames 2900...
524
+ [2023-04-27 22:33:36,396][19320] Num frames 3000...
525
+ [2023-04-27 22:33:36,479][19320] Avg episode rewards: #0: 5.200, true rewards: #0: 4.343
526
+ [2023-04-27 22:33:36,480][19320] Avg episode reward: 5.200, avg true_objective: 4.343
527
+ [2023-04-27 22:33:36,531][19320] Num frames 3100...
528
+ [2023-04-27 22:33:36,612][19320] Num frames 3200...
529
+ [2023-04-27 22:33:36,740][19320] Avg episode rewards: #0: 4.870, true rewards: #0: 4.120
530
+ [2023-04-27 22:33:36,741][19320] Avg episode reward: 4.870, avg true_objective: 4.120
531
+ [2023-04-27 22:33:36,747][19320] Num frames 3300...
532
+ [2023-04-27 22:33:36,839][19320] Num frames 3400...
533
+ [2023-04-27 22:33:36,925][19320] Num frames 3500...
534
+ [2023-04-27 22:33:36,998][19320] Num frames 3600...
535
+ [2023-04-27 22:33:37,114][19320] Avg episode rewards: #0: 4.756, true rewards: #0: 4.089
536
+ [2023-04-27 22:33:37,115][19320] Avg episode reward: 4.756, avg true_objective: 4.089
537
+ [2023-04-27 22:33:37,136][19320] Num frames 3700...
538
+ [2023-04-27 22:33:37,237][19320] Num frames 3800...
539
+ [2023-04-27 22:33:37,323][19320] Num frames 3900...
540
+ [2023-04-27 22:33:37,407][19320] Num frames 4000...
541
+ [2023-04-27 22:33:37,512][19320] Avg episode rewards: #0: 4.664, true rewards: #0: 4.064
542
+ [2023-04-27 22:33:37,513][19320] Avg episode reward: 4.664, avg true_objective: 4.064
543
+ [2023-04-27 22:33:41,947][19320] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!
544
+ [2023-04-27 22:36:21,719][19320] Loading existing experiment configuration from /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/config.json
545
+ [2023-04-27 22:36:21,720][19320] Overriding arg 'num_workers' with value 1 passed from command line
546
+ [2023-04-27 22:36:21,721][19320] Adding new argument 'no_render'=True that is not in the saved config file!
547
+ [2023-04-27 22:36:21,722][19320] Adding new argument 'save_video'=True that is not in the saved config file!
548
+ [2023-04-27 22:36:21,722][19320] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
549
+ [2023-04-27 22:36:21,723][19320] Adding new argument 'video_name'=None that is not in the saved config file!
550
+ [2023-04-27 22:36:21,724][19320] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
551
+ [2023-04-27 22:36:21,724][19320] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
552
+ [2023-04-27 22:36:21,725][19320] Adding new argument 'push_to_hub'=True that is not in the saved config file!
553
+ [2023-04-27 22:36:21,726][19320] Adding new argument 'hf_repository'='ItchyB/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
554
+ [2023-04-27 22:36:21,726][19320] Adding new argument 'policy_index'=0 that is not in the saved config file!
555
+ [2023-04-27 22:36:21,726][19320] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
556
+ [2023-04-27 22:36:21,727][19320] Adding new argument 'train_script'=None that is not in the saved config file!
557
+ [2023-04-27 22:36:21,728][19320] Adding new argument 'enjoy_script'=None that is not in the saved config file!
558
+ [2023-04-27 22:36:21,729][19320] Using frameskip 1 and render_action_repeat=4 for evaluation
559
+ [2023-04-27 22:36:21,732][19320] RunningMeanStd input shape: (3, 72, 128)
560
+ [2023-04-27 22:36:21,733][19320] RunningMeanStd input shape: (1,)
561
+ [2023-04-27 22:36:21,740][19320] ConvEncoder: input_channels=3
562
+ [2023-04-27 22:36:21,763][19320] Conv encoder output size: 512
563
+ [2023-04-27 22:36:21,764][19320] Policy head output size: 512
564
+ [2023-04-27 22:36:21,791][19320] Loading state from checkpoint /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
565
+ [2023-04-27 22:36:22,219][19320] Num frames 100...
566
+ [2023-04-27 22:36:22,339][19320] Num frames 200...
567
+ [2023-04-27 22:36:22,445][19320] Num frames 300...
568
+ [2023-04-27 22:36:22,572][19320] Num frames 400...
569
+ [2023-04-27 22:36:22,684][19320] Num frames 500...
570
+ [2023-04-27 22:36:22,754][19320] Avg episode rewards: #0: 7.120, true rewards: #0: 5.120
571
+ [2023-04-27 22:36:22,755][19320] Avg episode reward: 7.120, avg true_objective: 5.120
572
+ [2023-04-27 22:36:22,855][19320] Num frames 600...
573
+ [2023-04-27 22:36:22,974][19320] Num frames 700...
574
+ [2023-04-27 22:36:23,101][19320] Num frames 800...
575
+ [2023-04-27 22:36:23,257][19320] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
576
+ [2023-04-27 22:36:23,258][19320] Avg episode reward: 5.480, avg true_objective: 4.480
577
+ [2023-04-27 22:36:23,263][19320] Num frames 900...
578
+ [2023-04-27 22:36:23,371][19320] Num frames 1000...
579
+ [2023-04-27 22:36:23,473][19320] Num frames 1100...
580
+ [2023-04-27 22:36:23,587][19320] Num frames 1200...
581
+ [2023-04-27 22:36:23,727][19320] Avg episode rewards: #0: 4.933, true rewards: #0: 4.267
582
+ [2023-04-27 22:36:23,728][19320] Avg episode reward: 4.933, avg true_objective: 4.267
583
+ [2023-04-27 22:36:23,755][19320] Num frames 1300...
584
+ [2023-04-27 22:36:23,873][19320] Num frames 1400...
585
+ [2023-04-27 22:36:23,991][19320] Num frames 1500...
586
+ [2023-04-27 22:36:24,080][19320] Avg episode rewards: #0: 4.340, true rewards: #0: 3.840
587
+ [2023-04-27 22:36:24,081][19320] Avg episode reward: 4.340, avg true_objective: 3.840
588
+ [2023-04-27 22:36:24,143][19320] Num frames 1600...
589
+ [2023-04-27 22:36:24,240][19320] Num frames 1700...
590
+ [2023-04-27 22:36:24,341][19320] Num frames 1800...
591
+ [2023-04-27 22:36:24,432][19320] Num frames 1900...
592
+ [2023-04-27 22:36:24,562][19320] Avg episode rewards: #0: 4.768, true rewards: #0: 3.968
593
+ [2023-04-27 22:36:24,563][19320] Avg episode reward: 4.768, avg true_objective: 3.968
594
+ [2023-04-27 22:36:24,577][19320] Num frames 2000...
595
+ [2023-04-27 22:36:24,676][19320] Num frames 2100...
596
+ [2023-04-27 22:36:24,778][19320] Num frames 2200...
597
+ [2023-04-27 22:36:24,881][19320] Num frames 2300...
598
+ [2023-04-27 22:36:25,004][19320] Avg episode rewards: #0: 4.613, true rewards: #0: 3.947
599
+ [2023-04-27 22:36:25,005][19320] Avg episode reward: 4.613, avg true_objective: 3.947
600
+ [2023-04-27 22:36:25,036][19320] Num frames 2400...
601
+ [2023-04-27 22:36:25,138][19320] Num frames 2500...
602
+ [2023-04-27 22:36:25,237][19320] Num frames 2600...
603
+ [2023-04-27 22:36:25,340][19320] Num frames 2700...
604
+ [2023-04-27 22:36:25,446][19320] Avg episode rewards: #0: 4.503, true rewards: #0: 3.931
605
+ [2023-04-27 22:36:25,447][19320] Avg episode reward: 4.503, avg true_objective: 3.931
606
+ [2023-04-27 22:36:25,498][19320] Num frames 2800...
607
+ [2023-04-27 22:36:25,598][19320] Num frames 2900...
608
+ [2023-04-27 22:36:25,701][19320] Num frames 3000...
609
+ [2023-04-27 22:36:25,800][19320] Num frames 3100...
610
+ [2023-04-27 22:36:25,888][19320] Avg episode rewards: #0: 4.420, true rewards: #0: 3.920
611
+ [2023-04-27 22:36:25,889][19320] Avg episode reward: 4.420, avg true_objective: 3.920
612
+ [2023-04-27 22:36:25,969][19320] Num frames 3200...
613
+ [2023-04-27 22:36:26,076][19320] Num frames 3300...
614
+ [2023-04-27 22:36:26,181][19320] Num frames 3400...
615
+ [2023-04-27 22:36:26,281][19320] Num frames 3500...
616
+ [2023-04-27 22:36:26,354][19320] Avg episode rewards: #0: 4.356, true rewards: #0: 3.911
617
+ [2023-04-27 22:36:26,355][19320] Avg episode reward: 4.356, avg true_objective: 3.911
618
+ [2023-04-27 22:36:26,446][19320] Num frames 3600...
619
+ [2023-04-27 22:36:26,560][19320] Num frames 3700...
620
+ [2023-04-27 22:36:26,670][19320] Num frames 3800...
621
+ [2023-04-27 22:36:26,784][19320] Num frames 3900...
622
+ [2023-04-27 22:36:26,842][19320] Avg episode rewards: #0: 4.304, true rewards: #0: 3.904
623
+ [2023-04-27 22:36:26,842][19320] Avg episode reward: 4.304, avg true_objective: 3.904
624
+ [2023-04-27 22:36:30,880][19320] Replay video saved to /home/byron/projects/rl-learning-course/unit-08/train_dir/default_experiment/replay.mp4!