diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -2996,3 +2996,1474 @@ main_loop: 1204.4236 [2023-02-27 11:47:37,612][00394] Avg episode rewards: #0: 4.014, true rewards: #0: 3.714 [2023-02-27 11:47:37,613][00394] Avg episode reward: 4.014, avg true_objective: 3.714 [2023-02-27 11:47:58,118][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-27 11:48:01,600][00394] The model has been pushed to https://huggingface.co/Clawoo/rl_course_vizdoom_health_gathering_supreme +[2023-02-27 11:49:57,137][00394] Environment doom_basic already registered, overwriting... +[2023-02-27 11:49:57,140][00394] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-27 11:49:57,142][00394] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-27 11:49:57,145][00394] Environment doom_dm already registered, overwriting... +[2023-02-27 11:49:57,146][00394] Environment doom_dwango5 already registered, overwriting... +[2023-02-27 11:49:57,150][00394] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-27 11:49:57,151][00394] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-27 11:49:57,152][00394] Environment doom_my_way_home already registered, overwriting... +[2023-02-27 11:49:57,154][00394] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-27 11:49:57,155][00394] Environment doom_defend_the_center already registered, overwriting... +[2023-02-27 11:49:57,157][00394] Environment doom_defend_the_line already registered, overwriting... +[2023-02-27 11:49:57,159][00394] Environment doom_health_gathering already registered, overwriting... +[2023-02-27 11:49:57,160][00394] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-27 11:49:57,162][00394] Environment doom_battle already registered, overwriting... +[2023-02-27 11:49:57,163][00394] Environment doom_battle2 already registered, overwriting... +[2023-02-27 11:49:57,165][00394] Environment doom_duel_bots already registered, overwriting... +[2023-02-27 11:49:57,166][00394] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-27 11:49:57,168][00394] Environment doom_duel already registered, overwriting... +[2023-02-27 11:49:57,170][00394] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-27 11:49:57,171][00394] Environment doom_benchmark already registered, overwriting... +[2023-02-27 11:49:57,172][00394] register_encoder_factory: +[2023-02-27 11:49:57,208][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 11:49:57,210][00394] Overriding arg 'train_for_env_steps' with value 12000000 passed from command line +[2023-02-27 11:49:57,217][00394] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-27 11:49:57,218][00394] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-27 11:49:57,221][00394] Weights and Biases integration disabled +[2023-02-27 11:49:57,229][00394] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-27 11:50:00,950][00394] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=12000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-27 11:50:00,956][00394] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-27 11:50:00,961][00394] Rollout worker 0 uses device cpu +[2023-02-27 11:50:00,963][00394] Rollout worker 1 uses device cpu +[2023-02-27 11:50:00,966][00394] Rollout worker 2 uses device cpu +[2023-02-27 11:50:00,967][00394] Rollout worker 3 uses device cpu +[2023-02-27 11:50:00,968][00394] Rollout worker 4 uses device cpu +[2023-02-27 11:50:00,971][00394] Rollout worker 5 uses device cpu +[2023-02-27 11:50:00,973][00394] Rollout worker 6 uses device cpu +[2023-02-27 11:50:00,974][00394] Rollout worker 7 uses device cpu +[2023-02-27 11:50:01,132][00394] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 11:50:01,133][00394] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-27 11:50:01,175][00394] Starting all processes... +[2023-02-27 11:50:01,177][00394] Starting process learner_proc0 +[2023-02-27 11:50:01,386][00394] Starting all processes... +[2023-02-27 11:50:01,398][00394] Starting process inference_proc0-0 +[2023-02-27 11:50:01,399][00394] Starting process rollout_proc0 +[2023-02-27 11:50:01,399][00394] Starting process rollout_proc1 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc2 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc3 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc4 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc5 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc6 +[2023-02-27 11:50:01,422][00394] Starting process rollout_proc7 +[2023-02-27 11:50:10,855][37536] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 11:50:10,862][37536] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-27 11:50:10,936][37536] Num visible devices: 1 +[2023-02-27 11:50:10,962][37536] Starting seed is not provided +[2023-02-27 11:50:10,963][37536] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 11:50:10,963][37536] Initializing actor-critic model on device cuda:0 +[2023-02-27 11:50:10,964][37536] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 11:50:10,976][37536] RunningMeanStd input shape: (1,) +[2023-02-27 11:50:11,097][37536] ConvEncoder: input_channels=3 +[2023-02-27 11:50:11,686][37554] Worker 0 uses CPU cores [0] +[2023-02-27 11:50:11,860][37536] Conv encoder output size: 512 +[2023-02-27 11:50:11,872][37536] Policy head output size: 512 +[2023-02-27 11:50:11,970][37536] Created Actor Critic model with architecture: +[2023-02-27 11:50:11,982][37536] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-27 11:50:12,372][37555] Worker 1 uses CPU cores [1] +[2023-02-27 11:50:12,827][37566] Worker 2 uses CPU cores [0] +[2023-02-27 11:50:13,010][37564] Worker 4 uses CPU cores [0] +[2023-02-27 11:50:13,112][37556] Worker 3 uses CPU cores [1] +[2023-02-27 11:50:13,130][37558] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 11:50:13,134][37558] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-27 11:50:13,195][37558] Num visible devices: 1 +[2023-02-27 11:50:13,342][37569] Worker 5 uses CPU cores [1] +[2023-02-27 11:50:13,518][37577] Worker 6 uses CPU cores [0] +[2023-02-27 11:50:13,569][37576] Worker 7 uses CPU cores [1] +[2023-02-27 11:50:20,445][37536] Using optimizer +[2023-02-27 11:50:20,446][37536] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2023-02-27 11:50:20,483][37536] Loading model from checkpoint +[2023-02-27 11:50:20,488][37536] Loaded experiment state at self.train_step=1955, self.env_steps=8007680 +[2023-02-27 11:50:20,489][37536] Initialized policy 0 weights for model version 1955 +[2023-02-27 11:50:20,493][37536] LearnerWorker_p0 finished initialization! +[2023-02-27 11:50:20,494][37536] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 11:50:20,693][37558] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 11:50:20,694][37558] RunningMeanStd input shape: (1,) +[2023-02-27 11:50:20,706][37558] ConvEncoder: input_channels=3 +[2023-02-27 11:50:20,815][37558] Conv encoder output size: 512 +[2023-02-27 11:50:20,816][37558] Policy head output size: 512 +[2023-02-27 11:50:21,121][00394] Heartbeat connected on Batcher_0 +[2023-02-27 11:50:21,129][00394] Heartbeat connected on LearnerWorker_p0 +[2023-02-27 11:50:21,142][00394] Heartbeat connected on RolloutWorker_w0 +[2023-02-27 11:50:21,147][00394] Heartbeat connected on RolloutWorker_w1 +[2023-02-27 11:50:21,151][00394] Heartbeat connected on RolloutWorker_w2 +[2023-02-27 11:50:21,156][00394] Heartbeat connected on RolloutWorker_w3 +[2023-02-27 11:50:21,160][00394] Heartbeat connected on RolloutWorker_w4 +[2023-02-27 11:50:21,165][00394] Heartbeat connected on RolloutWorker_w5 +[2023-02-27 11:50:21,170][00394] Heartbeat connected on RolloutWorker_w6 +[2023-02-27 11:50:21,175][00394] Heartbeat connected on RolloutWorker_w7 +[2023-02-27 11:50:22,230][00394] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 8007680. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 11:50:23,256][00394] Inference worker 0-0 is ready! +[2023-02-27 11:50:23,258][00394] All inference workers are ready! Signal rollout workers to start! +[2023-02-27 11:50:23,262][00394] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-27 11:50:23,404][37576] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,401][37566] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,407][37555] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,408][37554] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,421][37564] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,425][37577] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,431][37556] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:23,429][37569] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 11:50:24,404][37555] Decorrelating experience for 0 frames... +[2023-02-27 11:50:24,405][37576] Decorrelating experience for 0 frames... +[2023-02-27 11:50:24,754][37566] Decorrelating experience for 0 frames... +[2023-02-27 11:50:24,756][37564] Decorrelating experience for 0 frames... +[2023-02-27 11:50:24,761][37554] Decorrelating experience for 0 frames... +[2023-02-27 11:50:25,350][37576] Decorrelating experience for 32 frames... +[2023-02-27 11:50:25,365][37569] Decorrelating experience for 0 frames... +[2023-02-27 11:50:25,601][37555] Decorrelating experience for 32 frames... +[2023-02-27 11:50:25,967][37564] Decorrelating experience for 32 frames... +[2023-02-27 11:50:25,979][37554] Decorrelating experience for 32 frames... +[2023-02-27 11:50:25,985][37566] Decorrelating experience for 32 frames... +[2023-02-27 11:50:26,201][37555] Decorrelating experience for 64 frames... +[2023-02-27 11:50:26,736][37577] Decorrelating experience for 0 frames... +[2023-02-27 11:50:27,153][37564] Decorrelating experience for 64 frames... +[2023-02-27 11:50:27,192][37554] Decorrelating experience for 64 frames... +[2023-02-27 11:50:27,230][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8007680. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 11:50:27,248][37569] Decorrelating experience for 32 frames... +[2023-02-27 11:50:27,266][37556] Decorrelating experience for 0 frames... +[2023-02-27 11:50:27,708][37555] Decorrelating experience for 96 frames... +[2023-02-27 11:50:27,997][37577] Decorrelating experience for 32 frames... +[2023-02-27 11:50:28,085][37566] Decorrelating experience for 64 frames... +[2023-02-27 11:50:28,540][37576] Decorrelating experience for 64 frames... +[2023-02-27 11:50:28,959][37554] Decorrelating experience for 96 frames... +[2023-02-27 11:50:29,157][37577] Decorrelating experience for 64 frames... +[2023-02-27 11:50:29,847][37564] Decorrelating experience for 96 frames... +[2023-02-27 11:50:30,018][37577] Decorrelating experience for 96 frames... +[2023-02-27 11:50:30,086][37556] Decorrelating experience for 32 frames... +[2023-02-27 11:50:30,379][37569] Decorrelating experience for 64 frames... +[2023-02-27 11:50:30,835][37566] Decorrelating experience for 96 frames... +[2023-02-27 11:50:31,622][37576] Decorrelating experience for 96 frames... +[2023-02-27 11:50:32,098][37556] Decorrelating experience for 64 frames... +[2023-02-27 11:50:32,188][37569] Decorrelating experience for 96 frames... +[2023-02-27 11:50:32,231][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8007680. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 11:50:32,795][37556] Decorrelating experience for 96 frames... +[2023-02-27 11:50:37,230][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8007680. Throughput: 0: 65.1. Samples: 976. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 11:50:37,238][00394] Avg episode reward: [(0, '2.115')] +[2023-02-27 11:50:38,312][37536] Signal inference workers to stop experience collection... +[2023-02-27 11:50:38,355][37558] InferenceWorker_p0-w0: stopping experience collection +[2023-02-27 11:50:42,231][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8007680. Throughput: 0: 130.7. Samples: 2614. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 11:50:42,312][00394] Avg episode reward: [(0, '2.333')] +[2023-02-27 11:50:44,270][37536] Signal inference workers to resume experience collection... +[2023-02-27 11:50:44,292][37558] InferenceWorker_p0-w0: resuming experience collection +[2023-02-27 11:50:47,274][00394] Fps is (10 sec: 409.5, 60 sec: 163.8, 300 sec: 163.8). Total num frames: 8011776. Throughput: 0: 104.6. Samples: 2614. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-27 11:50:47,444][00394] Avg episode reward: [(0, '3.152')] +[2023-02-27 11:50:52,266][00394] Fps is (10 sec: 819.2, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 8015872. Throughput: 0: 120.7. Samples: 3620. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2023-02-27 11:50:52,470][00394] Avg episode reward: [(0, '3.152')] +[2023-02-27 11:50:57,240][00394] Fps is (10 sec: 1228.2, 60 sec: 468.0, 300 sec: 468.0). Total num frames: 8024064. Throughput: 0: 131.6. Samples: 4608. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 11:50:57,301][00394] Avg episode reward: [(0, '3.550')] +[2023-02-27 11:51:02,232][00394] Fps is (10 sec: 1638.3, 60 sec: 614.4, 300 sec: 614.4). Total num frames: 8032256. Throughput: 0: 148.6. Samples: 5946. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 11:51:02,275][00394] Avg episode reward: [(0, '4.166')] +[2023-02-27 11:51:06,448][37558] Updated weights for policy 0, policy_version 1965 (0.0128) +[2023-02-27 11:51:07,230][00394] Fps is (10 sec: 2459.2, 60 sec: 910.2, 300 sec: 910.2). Total num frames: 8048640. Throughput: 0: 212.8. Samples: 9578. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 11:51:07,253][00394] Avg episode reward: [(0, '4.601')] +[2023-02-27 11:51:12,230][00394] Fps is (10 sec: 4096.4, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 8073216. Throughput: 0: 358.1. Samples: 16114. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 11:51:12,232][00394] Avg episode reward: [(0, '5.238')] +[2023-02-27 11:51:17,235][00394] Fps is (10 sec: 3686.4, 60 sec: 1415.0, 300 sec: 1415.0). Total num frames: 8085504. Throughput: 0: 429.0. Samples: 19304. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 11:51:17,408][00394] Avg episode reward: [(0, '5.073')] +[2023-02-27 11:51:18,826][37558] Updated weights for policy 0, policy_version 1975 (0.0037) +[2023-02-27 11:51:22,234][00394] Fps is (10 sec: 2457.6, 60 sec: 1501.9, 300 sec: 1501.9). Total num frames: 8097792. Throughput: 0: 482.6. Samples: 22694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:51:22,329][00394] Avg episode reward: [(0, '5.126')] +[2023-02-27 11:51:27,250][00394] Fps is (10 sec: 2047.5, 60 sec: 1638.3, 300 sec: 1512.3). Total num frames: 8105984. Throughput: 0: 518.4. Samples: 25942. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 11:51:27,394][00394] Avg episode reward: [(0, '4.843')] +[2023-02-27 11:51:32,232][00394] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1579.9). Total num frames: 8118272. Throughput: 0: 553.7. Samples: 27530. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2023-02-27 11:51:32,300][00394] Avg episode reward: [(0, '4.578')] +[2023-02-27 11:51:34,950][37558] Updated weights for policy 0, policy_version 1985 (0.0178) +[2023-02-27 11:51:37,230][00394] Fps is (10 sec: 3277.6, 60 sec: 2184.5, 300 sec: 1747.6). Total num frames: 8138752. Throughput: 0: 618.9. Samples: 31472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:51:37,252][00394] Avg episode reward: [(0, '4.323')] +[2023-02-27 11:51:42,230][00394] Fps is (10 sec: 4096.1, 60 sec: 2525.9, 300 sec: 1894.4). Total num frames: 8159232. Throughput: 0: 739.6. Samples: 37884. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 11:51:42,232][00394] Avg episode reward: [(0, '4.572')] +[2023-02-27 11:51:47,233][00394] Fps is (10 sec: 2866.2, 60 sec: 2594.1, 300 sec: 1879.3). Total num frames: 8167424. Throughput: 0: 774.5. Samples: 40800. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 11:51:47,345][00394] Avg episode reward: [(0, '4.701')] +[2023-02-27 11:51:47,383][37558] Updated weights for policy 0, policy_version 1995 (0.0046) +[2023-02-27 11:51:52,242][00394] Fps is (10 sec: 2047.9, 60 sec: 2730.7, 300 sec: 1911.5). Total num frames: 8179712. Throughput: 0: 761.4. Samples: 43842. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:51:52,411][00394] Avg episode reward: [(0, '4.798')] +[2023-02-27 11:51:57,235][00394] Fps is (10 sec: 2458.3, 60 sec: 2799.2, 300 sec: 1940.2). Total num frames: 8192000. Throughput: 0: 687.5. Samples: 47052. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 11:51:57,341][00394] Avg episode reward: [(0, '4.876')] +[2023-02-27 11:51:58,806][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002001_8196096.pth... +[2023-02-27 11:51:59,372][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001953_7999488.pth +[2023-02-27 11:52:02,230][00394] Fps is (10 sec: 2867.3, 60 sec: 2935.5, 300 sec: 2007.0). Total num frames: 8208384. Throughput: 0: 652.8. Samples: 48678. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 11:52:02,257][00394] Avg episode reward: [(0, '4.689')] +[2023-02-27 11:52:03,041][37558] Updated weights for policy 0, policy_version 2005 (0.0133) +[2023-02-27 11:52:07,230][00394] Fps is (10 sec: 3686.5, 60 sec: 3003.7, 300 sec: 2106.5). Total num frames: 8228864. Throughput: 0: 698.0. Samples: 54106. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:52:07,239][00394] Avg episode reward: [(0, '4.837')] +[2023-02-27 11:52:12,230][00394] Fps is (10 sec: 3686.4, 60 sec: 2867.2, 300 sec: 2159.7). Total num frames: 8245248. Throughput: 0: 764.6. Samples: 60346. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 11:52:12,249][00394] Avg episode reward: [(0, '4.879')] +[2023-02-27 11:52:16,160][37558] Updated weights for policy 0, policy_version 2015 (0.0046) +[2023-02-27 11:52:17,233][00394] Fps is (10 sec: 2457.7, 60 sec: 2798.9, 300 sec: 2137.0). Total num frames: 8253440. Throughput: 0: 764.9. Samples: 61952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:52:17,329][00394] Avg episode reward: [(0, '4.743')] +[2023-02-27 11:52:22,237][00394] Fps is (10 sec: 2048.0, 60 sec: 2798.9, 300 sec: 2150.4). Total num frames: 8265728. Throughput: 0: 743.7. Samples: 64938. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-27 11:52:22,350][00394] Avg episode reward: [(0, '4.757')] +[2023-02-27 11:52:27,252][00394] Fps is (10 sec: 2457.1, 60 sec: 2867.2, 300 sec: 2162.7). Total num frames: 8278016. Throughput: 0: 680.2. Samples: 68496. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 11:52:27,389][00394] Avg episode reward: [(0, '4.860')] +[2023-02-27 11:52:30,626][37558] Updated weights for policy 0, policy_version 2025 (0.0181) +[2023-02-27 11:52:32,230][00394] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 2237.0). Total num frames: 8298496. Throughput: 0: 665.0. Samples: 70724. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2023-02-27 11:52:32,250][00394] Avg episode reward: [(0, '5.047')] +[2023-02-27 11:52:37,230][00394] Fps is (10 sec: 4096.8, 60 sec: 3003.7, 300 sec: 2305.9). Total num frames: 8318976. Throughput: 0: 736.5. Samples: 76984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:52:37,238][00394] Avg episode reward: [(0, '5.365')] +[2023-02-27 11:52:42,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2798.9, 300 sec: 2282.1). Total num frames: 8327168. Throughput: 0: 775.1. Samples: 81930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:52:42,278][00394] Avg episode reward: [(0, '5.340')] +[2023-02-27 11:52:43,872][37558] Updated weights for policy 0, policy_version 2035 (0.0046) +[2023-02-27 11:52:47,237][00394] Fps is (10 sec: 2455.8, 60 sec: 2935.3, 300 sec: 2316.2). Total num frames: 8343552. Throughput: 0: 770.2. Samples: 83342. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 11:52:47,299][00394] Avg episode reward: [(0, '5.256')] +[2023-02-27 11:52:52,236][00394] Fps is (10 sec: 2456.9, 60 sec: 2867.1, 300 sec: 2293.7). Total num frames: 8351744. Throughput: 0: 722.3. Samples: 86612. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 11:52:52,316][00394] Avg episode reward: [(0, '5.291')] +[2023-02-27 11:52:57,230][00394] Fps is (10 sec: 2869.3, 60 sec: 3003.8, 300 sec: 2351.9). Total num frames: 8372224. Throughput: 0: 685.7. Samples: 91204. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 11:52:57,258][00394] Avg episode reward: [(0, '5.007')] +[2023-02-27 11:52:57,821][37558] Updated weights for policy 0, policy_version 2045 (0.0145) +[2023-02-27 11:53:02,230][00394] Fps is (10 sec: 4097.1, 60 sec: 3072.0, 300 sec: 2406.4). Total num frames: 8392704. Throughput: 0: 722.0. Samples: 94444. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 11:53:02,233][00394] Avg episode reward: [(0, '5.105')] +[2023-02-27 11:53:07,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2935.5, 300 sec: 2408.0). Total num frames: 8404992. Throughput: 0: 777.8. Samples: 99938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:53:07,304][00394] Avg episode reward: [(0, '5.097')] +[2023-02-27 11:53:11,764][37558] Updated weights for policy 0, policy_version 2055 (0.0137) +[2023-02-27 11:53:12,230][00394] Fps is (10 sec: 2457.5, 60 sec: 2867.2, 300 sec: 2409.4). Total num frames: 8417280. Throughput: 0: 766.3. Samples: 102978. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2023-02-27 11:53:12,281][00394] Avg episode reward: [(0, '5.363')] +[2023-02-27 11:53:17,251][00394] Fps is (10 sec: 2046.8, 60 sec: 2866.9, 300 sec: 2387.3). Total num frames: 8425472. Throughput: 0: 751.4. Samples: 104540. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:53:17,350][00394] Avg episode reward: [(0, '5.359')] +[2023-02-27 11:53:22,235][00394] Fps is (10 sec: 2457.6, 60 sec: 2935.5, 300 sec: 2412.1). Total num frames: 8441856. Throughput: 0: 697.3. Samples: 108362. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 11:53:22,286][00394] Avg episode reward: [(0, '5.245')] +[2023-02-27 11:53:25,593][37558] Updated weights for policy 0, policy_version 2065 (0.0094) +[2023-02-27 11:53:27,230][00394] Fps is (10 sec: 3688.4, 60 sec: 3072.1, 300 sec: 2457.6). Total num frames: 8462336. Throughput: 0: 713.1. Samples: 114018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:53:27,233][00394] Avg episode reward: [(0, '5.286')] +[2023-02-27 11:53:32,237][00394] Fps is (10 sec: 3683.8, 60 sec: 3003.4, 300 sec: 2479.1). Total num frames: 8478720. Throughput: 0: 753.2. Samples: 117234. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:53:32,423][00394] Avg episode reward: [(0, '5.225')] +[2023-02-27 11:53:37,251][00394] Fps is (10 sec: 2867.0, 60 sec: 2867.1, 300 sec: 2478.6). Total num frames: 8491008. Throughput: 0: 758.5. Samples: 120742. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 11:53:37,329][00394] Avg episode reward: [(0, '5.297')] +[2023-02-27 11:53:41,060][37558] Updated weights for policy 0, policy_version 2075 (0.0106) +[2023-02-27 11:53:42,245][00394] Fps is (10 sec: 2049.1, 60 sec: 2867.1, 300 sec: 2457.6). Total num frames: 8499200. Throughput: 0: 731.6. Samples: 124128. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-27 11:53:42,352][00394] Avg episode reward: [(0, '5.096')] +[2023-02-27 11:53:47,235][00394] Fps is (10 sec: 2048.2, 60 sec: 2799.3, 300 sec: 2457.6). Total num frames: 8511488. Throughput: 0: 702.3. Samples: 126048. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 11:53:47,361][00394] Avg episode reward: [(0, '5.220')] +[2023-02-27 11:53:52,230][00394] Fps is (10 sec: 3687.1, 60 sec: 3072.1, 300 sec: 2516.1). Total num frames: 8536064. Throughput: 0: 681.2. Samples: 130590. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 11:53:52,257][00394] Avg episode reward: [(0, '5.050')] +[2023-02-27 11:53:53,055][37558] Updated weights for policy 0, policy_version 2085 (0.0122) +[2023-02-27 11:53:57,235][00394] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 2533.8). Total num frames: 8552448. Throughput: 0: 755.9. Samples: 136994. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:53:57,256][00394] Avg episode reward: [(0, '4.961')] +[2023-02-27 11:53:59,074][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002089_8556544.pth... +[2023-02-27 11:53:59,613][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth +[2023-02-27 11:54:02,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2798.9, 300 sec: 2513.5). Total num frames: 8560640. Throughput: 0: 754.9. Samples: 138506. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 11:54:02,328][00394] Avg episode reward: [(0, '4.816')] +[2023-02-27 11:54:07,241][00394] Fps is (10 sec: 2048.1, 60 sec: 2798.9, 300 sec: 2512.2). Total num frames: 8572928. Throughput: 0: 733.8. Samples: 141382. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) +[2023-02-27 11:54:07,330][00394] Avg episode reward: [(0, '4.872')] +[2023-02-27 11:54:11,016][37558] Updated weights for policy 0, policy_version 2095 (0.0122) +[2023-02-27 11:54:12,255][00394] Fps is (10 sec: 2047.9, 60 sec: 2730.6, 300 sec: 2493.2). Total num frames: 8581120. Throughput: 0: 683.4. Samples: 144772. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-27 11:54:12,494][00394] Avg episode reward: [(0, '4.778')] +[2023-02-27 11:54:17,230][00394] Fps is (10 sec: 2867.1, 60 sec: 2935.7, 300 sec: 2527.3). Total num frames: 8601600. Throughput: 0: 654.9. Samples: 146700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 11:54:17,300][00394] Avg episode reward: [(0, '4.929')] +[2023-02-27 11:54:21,778][37558] Updated weights for policy 0, policy_version 2105 (0.0039) +[2023-02-27 11:54:22,230][00394] Fps is (10 sec: 4096.3, 60 sec: 3003.8, 300 sec: 2560.0). Total num frames: 8622080. Throughput: 0: 706.0. Samples: 152512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:54:22,235][00394] Avg episode reward: [(0, '5.038')] +[2023-02-27 11:54:27,250][00394] Fps is (10 sec: 3276.9, 60 sec: 2867.2, 300 sec: 2557.9). Total num frames: 8634368. Throughput: 0: 758.3. Samples: 158252. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:54:27,418][00394] Avg episode reward: [(0, '4.775')] +[2023-02-27 11:54:32,233][00394] Fps is (10 sec: 1638.3, 60 sec: 2662.7, 300 sec: 2523.1). Total num frames: 8638464. Throughput: 0: 738.1. Samples: 159264. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:54:32,312][00394] Avg episode reward: [(0, '4.764')] +[2023-02-27 11:54:37,236][00394] Fps is (10 sec: 1638.4, 60 sec: 2662.4, 300 sec: 2521.8). Total num frames: 8650752. Throughput: 0: 676.2. Samples: 161020. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 11:54:37,294][00394] Avg episode reward: [(0, '4.841')] +[2023-02-27 11:54:41,781][37558] Updated weights for policy 0, policy_version 2115 (0.0143) +[2023-02-27 11:54:42,233][00394] Fps is (10 sec: 2457.7, 60 sec: 2730.8, 300 sec: 2520.6). Total num frames: 8663040. Throughput: 0: 616.0. Samples: 164712. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2023-02-27 11:54:42,301][00394] Avg episode reward: [(0, '4.694')] +[2023-02-27 11:54:47,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2550.3). Total num frames: 8683520. Throughput: 0: 635.1. Samples: 167086. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:54:47,258][00394] Avg episode reward: [(0, '4.942')] +[2023-02-27 11:54:51,176][37558] Updated weights for policy 0, policy_version 2125 (0.0063) +[2023-02-27 11:54:52,230][00394] Fps is (10 sec: 4505.6, 60 sec: 2867.2, 300 sec: 2594.1). Total num frames: 8708096. Throughput: 0: 714.8. Samples: 173546. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:54:52,235][00394] Avg episode reward: [(0, '4.947')] +[2023-02-27 11:54:57,246][00394] Fps is (10 sec: 2867.1, 60 sec: 2662.4, 300 sec: 2561.9). Total num frames: 8712192. Throughput: 0: 730.8. Samples: 177658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:54:57,354][00394] Avg episode reward: [(0, '4.804')] +[2023-02-27 11:55:02,234][00394] Fps is (10 sec: 1638.2, 60 sec: 2730.6, 300 sec: 2560.0). Total num frames: 8724480. Throughput: 0: 721.4. Samples: 179162. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 11:55:02,334][00394] Avg episode reward: [(0, '4.853')] +[2023-02-27 11:55:07,235][00394] Fps is (10 sec: 2457.7, 60 sec: 2730.7, 300 sec: 2558.2). Total num frames: 8736768. Throughput: 0: 666.1. Samples: 182488. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:55:07,296][00394] Avg episode reward: [(0, '4.786')] +[2023-02-27 11:55:09,829][37558] Updated weights for policy 0, policy_version 2135 (0.0122) +[2023-02-27 11:55:12,230][00394] Fps is (10 sec: 3277.2, 60 sec: 2935.5, 300 sec: 2584.7). Total num frames: 8757248. Throughput: 0: 636.6. Samples: 186898. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) +[2023-02-27 11:55:12,252][00394] Avg episode reward: [(0, '4.689')] +[2023-02-27 11:55:17,230][00394] Fps is (10 sec: 4096.0, 60 sec: 2935.5, 300 sec: 2610.3). Total num frames: 8777728. Throughput: 0: 685.2. Samples: 190098. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 11:55:17,240][00394] Avg episode reward: [(0, '4.512')] +[2023-02-27 11:55:18,917][37558] Updated weights for policy 0, policy_version 2145 (0.0030) +[2023-02-27 11:55:22,244][00394] Fps is (10 sec: 2867.2, 60 sec: 2730.7, 300 sec: 2638.1). Total num frames: 8785920. Throughput: 0: 781.0. Samples: 196164. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 11:55:22,449][00394] Avg episode reward: [(0, '4.689')] +[2023-02-27 11:55:27,236][00394] Fps is (10 sec: 1638.4, 60 sec: 2662.4, 300 sec: 2665.9). Total num frames: 8794112. Throughput: 0: 732.6. Samples: 197678. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:55:27,337][00394] Avg episode reward: [(0, '4.732')] +[2023-02-27 11:55:32,251][00394] Fps is (10 sec: 1638.4, 60 sec: 2730.7, 300 sec: 2693.6). Total num frames: 8802304. Throughput: 0: 713.8. Samples: 199206. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 11:55:32,404][00394] Avg episode reward: [(0, '4.780')] +[2023-02-27 11:55:37,250][00394] Fps is (10 sec: 2048.0, 60 sec: 2730.7, 300 sec: 2735.3). Total num frames: 8814592. Throughput: 0: 636.0. Samples: 202166. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 11:55:37,346][00394] Avg episode reward: [(0, '4.881')] +[2023-02-27 11:55:40,154][37558] Updated weights for policy 0, policy_version 2155 (0.0291) +[2023-02-27 11:55:42,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2790.9). Total num frames: 8835072. Throughput: 0: 639.5. Samples: 206434. Policy #0 lag: (min: 1.0, avg: 1.2, max: 3.0) +[2023-02-27 11:55:42,254][00394] Avg episode reward: [(0, '5.001')] +[2023-02-27 11:55:47,230][00394] Fps is (10 sec: 4096.1, 60 sec: 2867.2, 300 sec: 2846.4). Total num frames: 8855552. Throughput: 0: 677.1. Samples: 209630. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:55:47,232][00394] Avg episode reward: [(0, '4.742')] +[2023-02-27 11:55:52,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2594.1, 300 sec: 2846.4). Total num frames: 8863744. Throughput: 0: 732.9. Samples: 215468. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:55:52,370][00394] Avg episode reward: [(0, '4.640')] +[2023-02-27 11:55:52,638][37558] Updated weights for policy 0, policy_version 2165 (0.0036) +[2023-02-27 11:55:57,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2798.9, 300 sec: 2874.2). Total num frames: 8880128. Throughput: 0: 704.1. Samples: 218582. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:55:57,279][00394] Avg episode reward: [(0, '4.524')] +[2023-02-27 11:55:57,301][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002168_8880128.pth... +[2023-02-27 11:55:57,954][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002001_8196096.pth +[2023-02-27 11:56:02,232][00394] Fps is (10 sec: 2457.5, 60 sec: 2730.7, 300 sec: 2846.4). Total num frames: 8888320. Throughput: 0: 667.3. Samples: 220126. Policy #0 lag: (min: 0.0, avg: 1.0, max: 1.0) +[2023-02-27 11:56:02,311][00394] Avg episode reward: [(0, '4.785')] +[2023-02-27 11:56:07,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2798.9, 300 sec: 2818.6). Total num frames: 8904704. Throughput: 0: 612.4. Samples: 223724. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0) +[2023-02-27 11:56:07,281][00394] Avg episode reward: [(0, '4.787')] +[2023-02-27 11:56:07,994][37558] Updated weights for policy 0, policy_version 2175 (0.0141) +[2023-02-27 11:56:12,230][00394] Fps is (10 sec: 3686.5, 60 sec: 2798.9, 300 sec: 2846.4). Total num frames: 8925184. Throughput: 0: 705.7. Samples: 229434. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 11:56:12,232][00394] Avg episode reward: [(0, '5.033')] +[2023-02-27 11:56:17,231][00394] Fps is (10 sec: 3685.8, 60 sec: 2730.6, 300 sec: 2860.2). Total num frames: 8941568. Throughput: 0: 743.0. Samples: 232642. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:56:17,305][00394] Avg episode reward: [(0, '4.802')] +[2023-02-27 11:56:20,838][37558] Updated weights for policy 0, policy_version 2185 (0.0056) +[2023-02-27 11:56:22,240][00394] Fps is (10 sec: 2457.6, 60 sec: 2730.7, 300 sec: 2860.3). Total num frames: 8949760. Throughput: 0: 761.2. Samples: 236420. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:56:22,400][00394] Avg episode reward: [(0, '4.790')] +[2023-02-27 11:56:27,236][00394] Fps is (10 sec: 1637.6, 60 sec: 2730.4, 300 sec: 2846.3). Total num frames: 8957952. Throughput: 0: 734.2. Samples: 239478. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 11:56:27,364][00394] Avg episode reward: [(0, '4.595')] +[2023-02-27 11:56:32,232][00394] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 2832.5). Total num frames: 8974336. Throughput: 0: 700.3. Samples: 241142. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 11:56:32,289][00394] Avg episode reward: [(0, '4.468')] +[2023-02-27 11:56:36,291][37558] Updated weights for policy 0, policy_version 2195 (0.0199) +[2023-02-27 11:56:37,230][00394] Fps is (10 sec: 3278.9, 60 sec: 2935.5, 300 sec: 2818.6). Total num frames: 8990720. Throughput: 0: 669.3. Samples: 245586. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:56:37,262][00394] Avg episode reward: [(0, '4.389')] +[2023-02-27 11:56:42,230][00394] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 2874.2). Total num frames: 9015296. Throughput: 0: 735.6. Samples: 251686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:56:42,240][00394] Avg episode reward: [(0, '4.435')] +[2023-02-27 11:56:47,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2730.7, 300 sec: 2846.4). Total num frames: 9019392. Throughput: 0: 750.0. Samples: 253876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:56:47,289][00394] Avg episode reward: [(0, '4.552')] +[2023-02-27 11:56:52,252][00394] Fps is (10 sec: 1228.8, 60 sec: 2730.7, 300 sec: 2832.5). Total num frames: 9027584. Throughput: 0: 709.8. Samples: 255666. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 11:56:52,379][00394] Avg episode reward: [(0, '4.743')] +[2023-02-27 11:56:53,925][37558] Updated weights for policy 0, policy_version 2205 (0.0066) +[2023-02-27 11:56:57,237][00394] Fps is (10 sec: 2048.0, 60 sec: 2662.4, 300 sec: 2818.6). Total num frames: 9039872. Throughput: 0: 654.3. Samples: 258876. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 11:56:57,297][00394] Avg episode reward: [(0, '4.831')] +[2023-02-27 11:57:02,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2798.9, 300 sec: 2804.7). Total num frames: 9056256. Throughput: 0: 621.7. Samples: 260618. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2023-02-27 11:57:02,264][00394] Avg episode reward: [(0, '4.978')] +[2023-02-27 11:57:06,002][37558] Updated weights for policy 0, policy_version 2215 (0.0098) +[2023-02-27 11:57:07,230][00394] Fps is (10 sec: 3686.5, 60 sec: 2867.2, 300 sec: 2818.6). Total num frames: 9076736. Throughput: 0: 658.1. Samples: 266036. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:57:07,234][00394] Avg episode reward: [(0, '5.271')] +[2023-02-27 11:57:12,231][00394] Fps is (10 sec: 3686.0, 60 sec: 2798.9, 300 sec: 2846.4). Total num frames: 9093120. Throughput: 0: 727.2. Samples: 272200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:57:12,307][00394] Avg episode reward: [(0, '5.327')] +[2023-02-27 11:57:17,267][00394] Fps is (10 sec: 2457.6, 60 sec: 2662.5, 300 sec: 2832.5). Total num frames: 9101312. Throughput: 0: 726.9. Samples: 273854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:57:17,386][00394] Avg episode reward: [(0, '5.151')] +[2023-02-27 11:57:21,701][37558] Updated weights for policy 0, policy_version 2225 (0.0094) +[2023-02-27 11:57:22,230][00394] Fps is (10 sec: 2048.2, 60 sec: 2730.7, 300 sec: 2832.5). Total num frames: 9113600. Throughput: 0: 696.5. Samples: 276928. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-27 11:57:22,305][00394] Avg episode reward: [(0, '4.904')] +[2023-02-27 11:57:27,233][00394] Fps is (10 sec: 2457.6, 60 sec: 2799.2, 300 sec: 2804.7). Total num frames: 9125888. Throughput: 0: 640.0. Samples: 280486. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2023-02-27 11:57:27,294][00394] Avg episode reward: [(0, '5.004')] +[2023-02-27 11:57:32,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2804.7). Total num frames: 9146368. Throughput: 0: 636.6. Samples: 282524. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 11:57:32,252][00394] Avg episode reward: [(0, '5.268')] +[2023-02-27 11:57:34,113][37558] Updated weights for policy 0, policy_version 2235 (0.0104) +[2023-02-27 11:57:37,235][00394] Fps is (10 sec: 4093.7, 60 sec: 2935.2, 300 sec: 2846.3). Total num frames: 9166848. Throughput: 0: 733.2. Samples: 288664. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 11:57:37,238][00394] Avg episode reward: [(0, '5.193')] +[2023-02-27 11:57:42,234][00394] Fps is (10 sec: 2866.1, 60 sec: 2662.2, 300 sec: 2818.6). Total num frames: 9175040. Throughput: 0: 752.8. Samples: 292754. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:57:42,293][00394] Avg episode reward: [(0, '5.279')] +[2023-02-27 11:57:47,236][00394] Fps is (10 sec: 2047.8, 60 sec: 2798.6, 300 sec: 2832.5). Total num frames: 9187328. Throughput: 0: 749.2. Samples: 294336. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:57:47,334][00394] Avg episode reward: [(0, '5.469')] +[2023-02-27 11:57:51,151][37558] Updated weights for policy 0, policy_version 2245 (0.0121) +[2023-02-27 11:57:52,252][00394] Fps is (10 sec: 2048.4, 60 sec: 2798.8, 300 sec: 2790.8). Total num frames: 9195520. Throughput: 0: 704.3. Samples: 297732. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:57:52,426][00394] Avg episode reward: [(0, '5.653')] +[2023-02-27 11:57:53,425][37536] Saving new best policy, reward=5.653! +[2023-02-27 11:57:57,230][00394] Fps is (10 sec: 2459.2, 60 sec: 2867.2, 300 sec: 2776.9). Total num frames: 9211904. Throughput: 0: 640.9. Samples: 301042. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) +[2023-02-27 11:57:57,301][00394] Avg episode reward: [(0, '5.969')] +[2023-02-27 11:57:57,315][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002249_9211904.pth... +[2023-02-27 11:57:57,738][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002089_8556544.pth +[2023-02-27 11:57:57,746][37536] Saving new best policy, reward=5.969! +[2023-02-27 11:58:02,230][00394] Fps is (10 sec: 3277.5, 60 sec: 2867.2, 300 sec: 2790.8). Total num frames: 9228288. Throughput: 0: 654.5. Samples: 303308. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 11:58:02,254][00394] Avg episode reward: [(0, '5.989')] +[2023-02-27 11:58:02,257][37536] Saving new best policy, reward=5.989! +[2023-02-27 11:58:04,218][37558] Updated weights for policy 0, policy_version 2255 (0.0099) +[2023-02-27 11:58:07,236][00394] Fps is (10 sec: 3276.4, 60 sec: 2798.9, 300 sec: 2804.7). Total num frames: 9244672. Throughput: 0: 708.6. Samples: 308818. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 11:58:07,325][00394] Avg episode reward: [(0, '5.814')] +[2023-02-27 11:58:12,236][00394] Fps is (10 sec: 2456.4, 60 sec: 2662.2, 300 sec: 2804.7). Total num frames: 9252864. Throughput: 0: 696.7. Samples: 311842. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 11:58:12,324][00394] Avg episode reward: [(0, '6.029')] +[2023-02-27 11:58:12,338][37536] Saving new best policy, reward=6.029! +[2023-02-27 11:58:17,239][00394] Fps is (10 sec: 1638.6, 60 sec: 2662.4, 300 sec: 2777.0). Total num frames: 9261056. Throughput: 0: 685.1. Samples: 313354. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 11:58:17,392][00394] Avg episode reward: [(0, '5.923')] +[2023-02-27 11:58:22,249][00394] Fps is (10 sec: 1639.0, 60 sec: 2594.1, 300 sec: 2735.3). Total num frames: 9269248. Throughput: 0: 615.8. Samples: 316374. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:58:22,438][00394] Avg episode reward: [(0, '5.821')] +[2023-02-27 11:58:24,377][37558] Updated weights for policy 0, policy_version 2265 (0.0133) +[2023-02-27 11:58:27,248][00394] Fps is (10 sec: 2048.0, 60 sec: 2594.1, 300 sec: 2721.5). Total num frames: 9281536. Throughput: 0: 602.9. Samples: 319884. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2023-02-27 11:58:27,436][00394] Avg episode reward: [(0, '5.828')] +[2023-02-27 11:58:32,232][00394] Fps is (10 sec: 3277.1, 60 sec: 2594.1, 300 sec: 2749.2). Total num frames: 9302016. Throughput: 0: 614.7. Samples: 321992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 11:58:32,256][00394] Avg episode reward: [(0, '5.925')] +[2023-02-27 11:58:35,452][37558] Updated weights for policy 0, policy_version 2275 (0.0071) +[2023-02-27 11:58:37,234][00394] Fps is (10 sec: 3686.2, 60 sec: 2526.1, 300 sec: 2777.0). Total num frames: 9318400. Throughput: 0: 670.2. Samples: 327890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 11:58:37,337][00394] Avg episode reward: [(0, '6.120')] +[2023-02-27 11:58:37,429][37536] Saving new best policy, reward=6.120! +[2023-02-27 11:58:42,236][00394] Fps is (10 sec: 2457.6, 60 sec: 2526.0, 300 sec: 2763.1). Total num frames: 9326592. Throughput: 0: 661.1. Samples: 330790. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-27 11:58:42,319][00394] Avg episode reward: [(0, '6.365')] +[2023-02-27 11:58:43,862][37536] Saving new best policy, reward=6.365! +[2023-02-27 11:58:47,230][00394] Fps is (10 sec: 1638.5, 60 sec: 2457.9, 300 sec: 2707.5). Total num frames: 9334784. Throughput: 0: 641.4. Samples: 332170. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 11:58:47,358][00394] Avg episode reward: [(0, '6.538')] +[2023-02-27 11:58:48,761][37536] Saving new best policy, reward=6.538! +[2023-02-27 11:58:52,233][00394] Fps is (10 sec: 2048.0, 60 sec: 2526.0, 300 sec: 2693.6). Total num frames: 9347072. Throughput: 0: 586.4. Samples: 335206. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2023-02-27 11:58:52,285][00394] Avg episode reward: [(0, '6.896')] +[2023-02-27 11:58:52,300][37536] Saving new best policy, reward=6.896! +[2023-02-27 11:58:55,168][37558] Updated weights for policy 0, policy_version 2285 (0.0098) +[2023-02-27 11:58:57,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2594.1, 300 sec: 2735.3). Total num frames: 9367552. Throughput: 0: 614.4. Samples: 339488. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:58:57,250][00394] Avg episode reward: [(0, '7.330')] +[2023-02-27 11:58:57,266][37536] Saving new best policy, reward=7.330! +[2023-02-27 11:59:02,230][00394] Fps is (10 sec: 4096.0, 60 sec: 2662.4, 300 sec: 2763.1). Total num frames: 9388032. Throughput: 0: 648.8. Samples: 342548. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 11:59:02,235][00394] Avg episode reward: [(0, '7.778')] +[2023-02-27 11:59:02,243][37536] Saving new best policy, reward=7.778! +[2023-02-27 11:59:07,240][00394] Fps is (10 sec: 2457.0, 60 sec: 2457.5, 300 sec: 2749.2). Total num frames: 9392128. Throughput: 0: 691.9. Samples: 347510. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 11:59:07,384][00394] Avg episode reward: [(0, '7.842')] +[2023-02-27 11:59:07,795][37536] Saving new best policy, reward=7.842! +[2023-02-27 11:59:09,928][37558] Updated weights for policy 0, policy_version 2295 (0.0084) +[2023-02-27 11:59:12,235][00394] Fps is (10 sec: 1638.4, 60 sec: 2526.1, 300 sec: 2721.4). Total num frames: 9404416. Throughput: 0: 671.8. Samples: 350116. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2023-02-27 11:59:12,337][00394] Avg episode reward: [(0, '7.795')] +[2023-02-27 11:59:17,244][00394] Fps is (10 sec: 2048.3, 60 sec: 2525.8, 300 sec: 2679.7). Total num frames: 9412608. Throughput: 0: 659.4. Samples: 351666. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:59:17,331][00394] Avg episode reward: [(0, '8.083')] +[2023-02-27 11:59:17,432][37536] Saving new best policy, reward=8.083! +[2023-02-27 11:59:22,252][00394] Fps is (10 sec: 2047.9, 60 sec: 2594.2, 300 sec: 2679.8). Total num frames: 9424896. Throughput: 0: 601.7. Samples: 354968. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 11:59:22,342][00394] Avg episode reward: [(0, '8.314')] +[2023-02-27 11:59:22,353][37536] Saving new best policy, reward=8.314! +[2023-02-27 11:59:25,874][37558] Updated weights for policy 0, policy_version 2305 (0.0266) +[2023-02-27 11:59:27,230][00394] Fps is (10 sec: 3277.2, 60 sec: 2730.7, 300 sec: 2735.3). Total num frames: 9445376. Throughput: 0: 634.1. Samples: 359324. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 11:59:27,260][00394] Avg episode reward: [(0, '8.033')] +[2023-02-27 11:59:32,239][00394] Fps is (10 sec: 4096.2, 60 sec: 2730.7, 300 sec: 2763.1). Total num frames: 9465856. Throughput: 0: 668.7. Samples: 362262. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 11:59:32,311][00394] Avg episode reward: [(0, '7.705')] +[2023-02-27 11:59:37,237][00394] Fps is (10 sec: 2457.6, 60 sec: 2525.9, 300 sec: 2735.3). Total num frames: 9469952. Throughput: 0: 702.9. Samples: 366838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 11:59:37,358][00394] Avg episode reward: [(0, '7.461')] +[2023-02-27 11:59:42,233][00394] Fps is (10 sec: 819.1, 60 sec: 2457.6, 300 sec: 2679.7). Total num frames: 9474048. Throughput: 0: 614.1. Samples: 367122. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 11:59:42,277][00394] Avg episode reward: [(0, '7.403')] +[2023-02-27 11:59:46,707][37558] Updated weights for policy 0, policy_version 2315 (0.0096) +[2023-02-27 11:59:47,233][00394] Fps is (10 sec: 1228.4, 60 sec: 2457.5, 300 sec: 2624.2). Total num frames: 9482240. Throughput: 0: 575.1. Samples: 368430. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 11:59:47,303][00394] Avg episode reward: [(0, '7.469')] +[2023-02-27 11:59:52,240][00394] Fps is (10 sec: 2867.4, 60 sec: 2594.1, 300 sec: 2679.8). Total num frames: 9502720. Throughput: 0: 558.7. Samples: 372650. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 11:59:52,288][00394] Avg episode reward: [(0, '7.763')] +[2023-02-27 11:59:57,233][00394] Fps is (10 sec: 3687.1, 60 sec: 2525.8, 300 sec: 2693.6). Total num frames: 9519104. Throughput: 0: 632.8. Samples: 378592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 3.0) +[2023-02-27 11:59:57,277][00394] Avg episode reward: [(0, '8.084')] +[2023-02-27 11:59:57,495][37558] Updated weights for policy 0, policy_version 2325 (0.0129) +[2023-02-27 11:59:57,553][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002325_9523200.pth... +[2023-02-27 11:59:58,292][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002168_8880128.pth +[2023-02-27 12:00:02,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2321.1, 300 sec: 2679.8). Total num frames: 9527296. Throughput: 0: 630.6. Samples: 380044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:00:02,311][00394] Avg episode reward: [(0, '8.314')] +[2023-02-27 12:00:07,234][00394] Fps is (10 sec: 1638.6, 60 sec: 2389.4, 300 sec: 2638.1). Total num frames: 9535488. Throughput: 0: 622.0. Samples: 382956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:00:07,322][00394] Avg episode reward: [(0, '8.245')] +[2023-02-27 12:00:12,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2389.3, 300 sec: 2610.3). Total num frames: 9547776. Throughput: 0: 594.3. Samples: 386066. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 12:00:12,318][00394] Avg episode reward: [(0, '8.171')] +[2023-02-27 12:00:17,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2457.6, 300 sec: 2624.2). Total num frames: 9560064. Throughput: 0: 565.5. Samples: 387708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:00:17,309][00394] Avg episode reward: [(0, '7.626')] +[2023-02-27 12:00:18,127][37536] Signal inference workers to stop experience collection... (50 times) +[2023-02-27 12:00:18,163][37558] InferenceWorker_p0-w0: stopping experience collection (50 times) +[2023-02-27 12:00:18,246][37536] Signal inference workers to resume experience collection... (50 times) +[2023-02-27 12:00:18,247][37558] InferenceWorker_p0-w0: resuming experience collection (50 times) +[2023-02-27 12:00:18,289][37558] Updated weights for policy 0, policy_version 2335 (0.0195) +[2023-02-27 12:00:22,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2594.2, 300 sec: 2665.9). Total num frames: 9580544. Throughput: 0: 561.9. Samples: 392124. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0) +[2023-02-27 12:00:22,272][00394] Avg episode reward: [(0, '8.279')] +[2023-02-27 12:00:27,268][00394] Fps is (10 sec: 3275.3, 60 sec: 2457.4, 300 sec: 2679.7). Total num frames: 9592832. Throughput: 0: 679.0. Samples: 397678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:00:27,583][00394] Avg episode reward: [(0, '8.496')] +[2023-02-27 12:00:30,093][37536] Saving new best policy, reward=8.496! +[2023-02-27 12:00:32,233][00394] Fps is (10 sec: 1638.4, 60 sec: 2184.5, 300 sec: 2652.0). Total num frames: 9596928. Throughput: 0: 677.7. Samples: 398926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:00:32,365][00394] Avg episode reward: [(0, '8.810')] +[2023-02-27 12:00:37,238][00394] Fps is (10 sec: 819.6, 60 sec: 2184.5, 300 sec: 2596.4). Total num frames: 9601024. Throughput: 0: 609.3. Samples: 400068. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2023-02-27 12:00:37,344][00394] Avg episode reward: [(0, '8.768')] +[2023-02-27 12:00:38,715][37536] Saving new best policy, reward=8.810! +[2023-02-27 12:00:39,024][37558] Updated weights for policy 0, policy_version 2345 (0.0177) +[2023-02-27 12:00:42,230][00394] Fps is (10 sec: 819.2, 60 sec: 2184.6, 300 sec: 2540.9). Total num frames: 9605120. Throughput: 0: 480.3. Samples: 400204. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2023-02-27 12:00:42,271][00394] Avg episode reward: [(0, '8.897')] +[2023-02-27 12:00:42,274][37536] Saving new best policy, reward=8.897! +[2023-02-27 12:00:47,246][00394] Fps is (10 sec: 1638.4, 60 sec: 2252.9, 300 sec: 2554.8). Total num frames: 9617408. Throughput: 0: 490.0. Samples: 402092. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-27 12:00:47,317][00394] Avg episode reward: [(0, '9.288')] +[2023-02-27 12:00:47,389][37536] Saving new best policy, reward=9.288! +[2023-02-27 12:00:52,232][00394] Fps is (10 sec: 3276.8, 60 sec: 2252.8, 300 sec: 2568.7). Total num frames: 9637888. Throughput: 0: 523.0. Samples: 406490. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:00:52,265][00394] Avg episode reward: [(0, '9.842')] +[2023-02-27 12:00:52,269][37536] Saving new best policy, reward=9.842! +[2023-02-27 12:00:56,658][37558] Updated weights for policy 0, policy_version 2355 (0.0124) +[2023-02-27 12:00:57,272][00394] Fps is (10 sec: 2866.9, 60 sec: 2116.3, 300 sec: 2568.7). Total num frames: 9646080. Throughput: 0: 551.6. Samples: 410888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:00:57,651][00394] Avg episode reward: [(0, '9.967')] +[2023-02-27 12:01:01,797][37536] Saving new best policy, reward=9.967! +[2023-02-27 12:01:02,240][00394] Fps is (10 sec: 1228.5, 60 sec: 2047.9, 300 sec: 2527.0). Total num frames: 9650176. Throughput: 0: 525.9. Samples: 411376. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:01:02,446][00394] Avg episode reward: [(0, '10.146')] +[2023-02-27 12:01:02,467][37536] Saving new best policy, reward=10.146! +[2023-02-27 12:01:07,231][00394] Fps is (10 sec: 1228.8, 60 sec: 2048.0, 300 sec: 2485.4). Total num frames: 9658368. Throughput: 0: 468.1. Samples: 413188. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:01:07,318][00394] Avg episode reward: [(0, '10.575')] +[2023-02-27 12:01:07,364][37536] Saving new best policy, reward=10.575! +[2023-02-27 12:01:12,243][00394] Fps is (10 sec: 1638.8, 60 sec: 1979.7, 300 sec: 2457.6). Total num frames: 9666560. Throughput: 0: 415.2. Samples: 416362. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 12:01:12,401][00394] Avg episode reward: [(0, '10.165')] +[2023-02-27 12:01:17,259][00394] Fps is (10 sec: 2457.7, 60 sec: 2048.0, 300 sec: 2485.4). Total num frames: 9682944. Throughput: 0: 425.4. Samples: 418068. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-27 12:01:17,360][00394] Avg episode reward: [(0, '10.283')] +[2023-02-27 12:01:18,668][37558] Updated weights for policy 0, policy_version 2365 (0.0195) +[2023-02-27 12:01:22,237][00394] Fps is (10 sec: 3276.8, 60 sec: 1979.7, 300 sec: 2513.2). Total num frames: 9699328. Throughput: 0: 500.4. Samples: 422584. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) +[2023-02-27 12:01:22,294][00394] Avg episode reward: [(0, '10.206')] +[2023-02-27 12:01:27,285][00394] Fps is (10 sec: 2867.4, 60 sec: 1979.9, 300 sec: 2499.3). Total num frames: 9711616. Throughput: 0: 611.3. Samples: 427712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:01:27,497][00394] Avg episode reward: [(0, '10.040')] +[2023-02-27 12:01:32,237][00394] Fps is (10 sec: 1638.4, 60 sec: 1979.7, 300 sec: 2457.6). Total num frames: 9715712. Throughput: 0: 585.5. Samples: 428440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:01:32,348][00394] Avg episode reward: [(0, '10.124')] +[2023-02-27 12:01:36,533][37558] Updated weights for policy 0, policy_version 2375 (0.0141) +[2023-02-27 12:01:37,230][00394] Fps is (10 sec: 1638.4, 60 sec: 2116.3, 300 sec: 2415.9). Total num frames: 9728000. Throughput: 0: 536.4. Samples: 430626. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:01:37,252][00394] Avg episode reward: [(0, '10.079')] +[2023-02-27 12:01:42,233][00394] Fps is (10 sec: 2047.9, 60 sec: 2184.5, 300 sec: 2429.8). Total num frames: 9736192. Throughput: 0: 512.8. Samples: 433966. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:01:42,334][00394] Avg episode reward: [(0, '10.419')] +[2023-02-27 12:01:47,231][00394] Fps is (10 sec: 2867.2, 60 sec: 2321.1, 300 sec: 2471.5). Total num frames: 9756672. Throughput: 0: 547.7. Samples: 436020. Policy #0 lag: (min: 1.0, avg: 1.2, max: 3.0) +[2023-02-27 12:01:47,261][00394] Avg episode reward: [(0, '10.740')] +[2023-02-27 12:01:47,488][37536] Saving new best policy, reward=10.740! +[2023-02-27 12:01:50,831][37558] Updated weights for policy 0, policy_version 2385 (0.0119) +[2023-02-27 12:01:52,230][00394] Fps is (10 sec: 3686.7, 60 sec: 2252.8, 300 sec: 2485.4). Total num frames: 9773056. Throughput: 0: 621.3. Samples: 441144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:01:52,257][00394] Avg episode reward: [(0, '11.248')] +[2023-02-27 12:01:52,262][37536] Saving new best policy, reward=11.248! +[2023-02-27 12:01:57,243][00394] Fps is (10 sec: 2048.0, 60 sec: 2184.6, 300 sec: 2443.7). Total num frames: 9777152. Throughput: 0: 626.0. Samples: 444530. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:01:57,476][00394] Avg episode reward: [(0, '11.475')] +[2023-02-27 12:02:00,731][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002389_9785344.pth... +[2023-02-27 12:02:01,253][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002249_9211904.pth +[2023-02-27 12:02:01,261][37536] Saving new best policy, reward=11.475! +[2023-02-27 12:02:02,235][00394] Fps is (10 sec: 1228.6, 60 sec: 2252.8, 300 sec: 2402.0). Total num frames: 9785344. Throughput: 0: 597.8. Samples: 444970. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 12:02:02,405][00394] Avg episode reward: [(0, '11.799')] +[2023-02-27 12:02:04,246][37536] Saving new best policy, reward=11.799! +[2023-02-27 12:02:07,238][00394] Fps is (10 sec: 2048.0, 60 sec: 2321.1, 300 sec: 2388.2). Total num frames: 9797632. Throughput: 0: 552.9. Samples: 447464. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 12:02:07,290][00394] Avg episode reward: [(0, '11.905')] +[2023-02-27 12:02:07,363][37536] Saving new best policy, reward=11.905! +[2023-02-27 12:02:10,808][37558] Updated weights for policy 0, policy_version 2395 (0.0140) +[2023-02-27 12:02:12,234][00394] Fps is (10 sec: 2867.6, 60 sec: 2457.6, 300 sec: 2415.9). Total num frames: 9814016. Throughput: 0: 533.2. Samples: 451704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:02:12,260][00394] Avg episode reward: [(0, '12.516')] +[2023-02-27 12:02:12,262][37536] Saving new best policy, reward=12.516! +[2023-02-27 12:02:17,230][00394] Fps is (10 sec: 3686.4, 60 sec: 2525.9, 300 sec: 2443.7). Total num frames: 9834496. Throughput: 0: 576.1. Samples: 454366. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:02:17,256][00394] Avg episode reward: [(0, '13.247')] +[2023-02-27 12:02:17,273][37536] Saving new best policy, reward=13.247! +[2023-02-27 12:02:22,243][00394] Fps is (10 sec: 2867.2, 60 sec: 2389.3, 300 sec: 2429.8). Total num frames: 9842688. Throughput: 0: 649.1. Samples: 459834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:02:22,400][00394] Avg episode reward: [(0, '13.052')] +[2023-02-27 12:02:24,545][37558] Updated weights for policy 0, policy_version 2405 (0.0057) +[2023-02-27 12:02:27,249][00394] Fps is (10 sec: 2047.3, 60 sec: 2389.2, 300 sec: 2402.0). Total num frames: 9854976. Throughput: 0: 640.7. Samples: 462798. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 12:02:27,308][00394] Avg episode reward: [(0, '13.197')] +[2023-02-27 12:02:32,237][00394] Fps is (10 sec: 2457.6, 60 sec: 2525.9, 300 sec: 2374.3). Total num frames: 9867264. Throughput: 0: 627.8. Samples: 464270. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:02:32,336][00394] Avg episode reward: [(0, '14.223')] +[2023-02-27 12:02:32,346][37536] Saving new best policy, reward=14.223! +[2023-02-27 12:02:37,243][00394] Fps is (10 sec: 2458.4, 60 sec: 2525.9, 300 sec: 2388.2). Total num frames: 9879552. Throughput: 0: 590.0. Samples: 467694. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:02:37,284][00394] Avg episode reward: [(0, '14.761')] +[2023-02-27 12:02:37,378][37536] Saving new best policy, reward=14.761! +[2023-02-27 12:02:40,336][37558] Updated weights for policy 0, policy_version 2415 (0.0148) +[2023-02-27 12:02:42,230][00394] Fps is (10 sec: 3276.7, 60 sec: 2730.7, 300 sec: 2416.0). Total num frames: 9900032. Throughput: 0: 625.5. Samples: 472678. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:02:42,261][00394] Avg episode reward: [(0, '14.452')] +[2023-02-27 12:02:47,245][00394] Fps is (10 sec: 3686.4, 60 sec: 2662.4, 300 sec: 2443.7). Total num frames: 9916416. Throughput: 0: 683.9. Samples: 475746. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:02:47,390][00394] Avg episode reward: [(0, '15.438')] +[2023-02-27 12:02:47,527][37536] Saving new best policy, reward=15.438! +[2023-02-27 12:02:52,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2525.9, 300 sec: 2415.9). Total num frames: 9924608. Throughput: 0: 717.5. Samples: 479750. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:02:52,258][00394] Avg episode reward: [(0, '14.882')] +[2023-02-27 12:02:56,185][37558] Updated weights for policy 0, policy_version 2425 (0.0077) +[2023-02-27 12:02:57,302][00394] Fps is (10 sec: 1637.7, 60 sec: 2594.0, 300 sec: 2388.1). Total num frames: 9932800. Throughput: 0: 688.0. Samples: 482668. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 12:02:57,603][00394] Avg episode reward: [(0, '14.558')] +[2023-02-27 12:03:02,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2662.5, 300 sec: 2374.3). Total num frames: 9945088. Throughput: 0: 661.1. Samples: 484116. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0) +[2023-02-27 12:03:02,300][00394] Avg episode reward: [(0, '13.884')] +[2023-02-27 12:03:07,236][00394] Fps is (10 sec: 2458.6, 60 sec: 2662.4, 300 sec: 2388.2). Total num frames: 9957376. Throughput: 0: 626.5. Samples: 488028. Policy #0 lag: (min: 0.0, avg: 1.6, max: 2.0) +[2023-02-27 12:03:07,327][00394] Avg episode reward: [(0, '14.439')] +[2023-02-27 12:03:10,355][37558] Updated weights for policy 0, policy_version 2435 (0.0188) +[2023-02-27 12:03:12,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2730.7, 300 sec: 2429.8). Total num frames: 9977856. Throughput: 0: 673.5. Samples: 493102. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0) +[2023-02-27 12:03:12,252][00394] Avg episode reward: [(0, '14.500')] +[2023-02-27 12:03:17,230][00394] Fps is (10 sec: 3686.5, 60 sec: 2662.4, 300 sec: 2457.6). Total num frames: 9994240. Throughput: 0: 710.4. Samples: 496240. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-27 12:03:17,322][00394] Avg episode reward: [(0, '13.808')] +[2023-02-27 12:03:22,251][00394] Fps is (10 sec: 2457.6, 60 sec: 2662.4, 300 sec: 2443.7). Total num frames: 10002432. Throughput: 0: 711.6. Samples: 499716. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:03:22,452][00394] Avg episode reward: [(0, '14.568')] +[2023-02-27 12:03:27,080][37558] Updated weights for policy 0, policy_version 2445 (0.0139) +[2023-02-27 12:03:27,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2662.5, 300 sec: 2415.9). Total num frames: 10014720. Throughput: 0: 668.5. Samples: 502762. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:03:27,349][00394] Avg episode reward: [(0, '14.516')] +[2023-02-27 12:03:32,231][00394] Fps is (10 sec: 2457.6, 60 sec: 2662.4, 300 sec: 2402.1). Total num frames: 10027008. Throughput: 0: 636.8. Samples: 504404. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 12:03:32,277][00394] Avg episode reward: [(0, '15.343')] +[2023-02-27 12:03:37,233][00394] Fps is (10 sec: 2867.3, 60 sec: 2730.7, 300 sec: 2429.8). Total num frames: 10043392. Throughput: 0: 637.1. Samples: 508418. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:03:37,275][00394] Avg episode reward: [(0, '15.607')] +[2023-02-27 12:03:37,339][37536] Saving new best policy, reward=15.607! +[2023-02-27 12:03:40,064][37558] Updated weights for policy 0, policy_version 2455 (0.0139) +[2023-02-27 12:03:42,230][00394] Fps is (10 sec: 3686.4, 60 sec: 2730.7, 300 sec: 2471.5). Total num frames: 10063872. Throughput: 0: 691.5. Samples: 513784. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:03:42,254][00394] Avg episode reward: [(0, '15.320')] +[2023-02-27 12:03:47,244][00394] Fps is (10 sec: 2866.2, 60 sec: 2594.0, 300 sec: 2457.6). Total num frames: 10072064. Throughput: 0: 712.3. Samples: 516172. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:03:47,340][00394] Avg episode reward: [(0, '15.720')] +[2023-02-27 12:03:47,419][37536] Saving new best policy, reward=15.720! +[2023-02-27 12:03:52,250][00394] Fps is (10 sec: 1638.4, 60 sec: 2594.1, 300 sec: 2415.9). Total num frames: 10080256. Throughput: 0: 688.8. Samples: 519024. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:03:52,332][00394] Avg episode reward: [(0, '16.569')] +[2023-02-27 12:03:52,335][37536] Saving new best policy, reward=16.569! +[2023-02-27 12:03:57,236][00394] Fps is (10 sec: 2048.7, 60 sec: 2662.6, 300 sec: 2388.2). Total num frames: 10092544. Throughput: 0: 646.1. Samples: 522178. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:03:57,365][00394] Avg episode reward: [(0, '16.209')] +[2023-02-27 12:03:59,103][37558] Updated weights for policy 0, policy_version 2465 (0.0175) +[2023-02-27 12:03:59,103][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002465_10096640.pth... +[2023-02-27 12:03:59,619][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002325_9523200.pth +[2023-02-27 12:04:02,241][00394] Fps is (10 sec: 2047.2, 60 sec: 2594.0, 300 sec: 2402.0). Total num frames: 10100736. Throughput: 0: 611.7. Samples: 523768. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2023-02-27 12:04:02,334][00394] Avg episode reward: [(0, '16.357')] +[2023-02-27 12:04:07,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2798.9, 300 sec: 2443.7). Total num frames: 10125312. Throughput: 0: 625.7. Samples: 527874. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-27 12:04:07,287][00394] Avg episode reward: [(0, '16.120')] +[2023-02-27 12:04:10,249][37558] Updated weights for policy 0, policy_version 2475 (0.0086) +[2023-02-27 12:04:12,230][00394] Fps is (10 sec: 4507.1, 60 sec: 2798.9, 300 sec: 2485.4). Total num frames: 10145792. Throughput: 0: 700.2. Samples: 534272. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:04:12,237][00394] Avg episode reward: [(0, '15.801')] +[2023-02-27 12:04:17,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2594.1, 300 sec: 2457.6). Total num frames: 10149888. Throughput: 0: 709.7. Samples: 536340. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:04:17,300][00394] Avg episode reward: [(0, '15.357')] +[2023-02-27 12:04:22,261][00394] Fps is (10 sec: 819.2, 60 sec: 2525.9, 300 sec: 2402.1). Total num frames: 10153984. Throughput: 0: 637.5. Samples: 537106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:04:22,394][00394] Avg episode reward: [(0, '15.393')] +[2023-02-27 12:04:27,248][00394] Fps is (10 sec: 409.5, 60 sec: 2321.0, 300 sec: 2332.6). Total num frames: 10153984. Throughput: 0: 552.3. Samples: 538638. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:04:27,340][00394] Avg episode reward: [(0, '14.948')] +[2023-02-27 12:04:32,240][00394] Fps is (10 sec: 1638.4, 60 sec: 2389.3, 300 sec: 2374.3). Total num frames: 10170368. Throughput: 0: 539.1. Samples: 540432. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-27 12:04:32,377][00394] Avg episode reward: [(0, '15.998')] +[2023-02-27 12:04:33,995][37558] Updated weights for policy 0, policy_version 2485 (0.0191) +[2023-02-27 12:04:37,243][00394] Fps is (10 sec: 3686.8, 60 sec: 2457.6, 300 sec: 2429.8). Total num frames: 10190848. Throughput: 0: 573.8. Samples: 544844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:04:37,295][00394] Avg episode reward: [(0, '14.705')] +[2023-02-27 12:04:42,255][00394] Fps is (10 sec: 2457.6, 60 sec: 2184.5, 300 sec: 2416.0). Total num frames: 10194944. Throughput: 0: 599.2. Samples: 549142. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:04:42,403][00394] Avg episode reward: [(0, '14.615')] +[2023-02-27 12:04:47,336][00394] Fps is (10 sec: 812.4, 60 sec: 2113.4, 300 sec: 2359.7). Total num frames: 10199040. Throughput: 0: 568.3. Samples: 549388. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:04:47,476][00394] Avg episode reward: [(0, '14.615')] +[2023-02-27 12:04:52,258][00394] Fps is (10 sec: 819.2, 60 sec: 2048.0, 300 sec: 2318.8). Total num frames: 10203136. Throughput: 0: 480.3. Samples: 549488. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2023-02-27 12:04:52,415][00394] Avg episode reward: [(0, '15.000')] +[2023-02-27 12:04:56,881][37558] Updated weights for policy 0, policy_version 2495 (0.0100) +[2023-02-27 12:04:57,230][00394] Fps is (10 sec: 2065.3, 60 sec: 2116.3, 300 sec: 2346.5). Total num frames: 10219520. Throughput: 0: 424.0. Samples: 553350. Policy #0 lag: (min: 1.0, avg: 1.1, max: 3.0) +[2023-02-27 12:04:57,263][00394] Avg episode reward: [(0, '16.605')] +[2023-02-27 12:04:57,315][37536] Saving new best policy, reward=16.605! +[2023-02-27 12:05:02,230][00394] Fps is (10 sec: 3686.4, 60 sec: 2321.2, 300 sec: 2388.2). Total num frames: 10240000. Throughput: 0: 429.3. Samples: 555658. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:05:02,256][00394] Avg episode reward: [(0, '17.507')] +[2023-02-27 12:05:02,258][37536] Saving new best policy, reward=17.507! +[2023-02-27 12:05:07,269][00394] Fps is (10 sec: 2867.2, 60 sec: 2048.0, 300 sec: 2374.3). Total num frames: 10248192. Throughput: 0: 530.2. Samples: 560966. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:05:07,490][00394] Avg episode reward: [(0, '17.060')] +[2023-02-27 12:05:12,230][00394] Fps is (10 sec: 1638.4, 60 sec: 1843.2, 300 sec: 2360.4). Total num frames: 10256384. Throughput: 0: 540.9. Samples: 562980. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:05:12,287][00394] Avg episode reward: [(0, '16.625')] +[2023-02-27 12:05:14,759][37558] Updated weights for policy 0, policy_version 2505 (0.0128) +[2023-02-27 12:05:17,239][00394] Fps is (10 sec: 1228.8, 60 sec: 1843.2, 300 sec: 2304.9). Total num frames: 10260480. Throughput: 0: 536.5. Samples: 564576. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 12:05:17,319][00394] Avg episode reward: [(0, '16.303')] +[2023-02-27 12:05:22,242][00394] Fps is (10 sec: 1638.4, 60 sec: 1979.7, 300 sec: 2304.9). Total num frames: 10272768. Throughput: 0: 494.4. Samples: 567090. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2023-02-27 12:05:22,441][00394] Avg episode reward: [(0, '16.844')] +[2023-02-27 12:05:27,238][00394] Fps is (10 sec: 2867.2, 60 sec: 2252.8, 300 sec: 2346.5). Total num frames: 10289152. Throughput: 0: 482.7. Samples: 570864. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2023-02-27 12:05:27,295][00394] Avg episode reward: [(0, '16.600')] +[2023-02-27 12:05:31,385][37558] Updated weights for policy 0, policy_version 2515 (0.0203) +[2023-02-27 12:05:32,251][00394] Fps is (10 sec: 2867.1, 60 sec: 2184.5, 300 sec: 2374.3). Total num frames: 10301440. Throughput: 0: 529.7. Samples: 573180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:05:32,450][00394] Avg episode reward: [(0, '14.703')] +[2023-02-27 12:05:37,251][00394] Fps is (10 sec: 2045.2, 60 sec: 1979.3, 300 sec: 2388.1). Total num frames: 10309632. Throughput: 0: 600.4. Samples: 576516. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 12:05:37,343][00394] Avg episode reward: [(0, '14.857')] +[2023-02-27 12:05:42,233][00394] Fps is (10 sec: 1228.6, 60 sec: 1979.7, 300 sec: 2360.4). Total num frames: 10313728. Throughput: 0: 535.1. Samples: 577432. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 12:05:42,280][00394] Avg episode reward: [(0, '14.719')] +[2023-02-27 12:05:47,230][00394] Fps is (10 sec: 1230.5, 60 sec: 2050.9, 300 sec: 2318.8). Total num frames: 10321920. Throughput: 0: 518.0. Samples: 578966. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:05:47,344][00394] Avg episode reward: [(0, '15.214')] +[2023-02-27 12:05:52,288][00394] Fps is (10 sec: 1227.9, 60 sec: 2047.7, 300 sec: 2304.8). Total num frames: 10326016. Throughput: 0: 450.3. Samples: 581234. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:05:52,480][00394] Avg episode reward: [(0, '14.633')] +[2023-02-27 12:05:57,251][00394] Fps is (10 sec: 819.1, 60 sec: 1843.2, 300 sec: 2304.9). Total num frames: 10330112. Throughput: 0: 420.7. Samples: 581910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2023-02-27 12:05:57,392][00394] Avg episode reward: [(0, '14.383')] +[2023-02-27 12:05:57,708][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002523_10334208.pth... +[2023-02-27 12:05:58,062][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002389_9785344.pth +[2023-02-27 12:06:00,961][37558] Updated weights for policy 0, policy_version 2525 (0.0203) +[2023-02-27 12:06:02,244][00394] Fps is (10 sec: 1639.8, 60 sec: 1706.7, 300 sec: 2318.8). Total num frames: 10342400. Throughput: 0: 434.0. Samples: 584106. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 12:06:02,314][00394] Avg episode reward: [(0, '14.580')] +[2023-02-27 12:06:07,272][00394] Fps is (10 sec: 2867.4, 60 sec: 1843.2, 300 sec: 2346.5). Total num frames: 10358784. Throughput: 0: 468.8. Samples: 588186. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 12:06:07,528][00394] Avg episode reward: [(0, '13.426')] +[2023-02-27 12:06:12,231][00394] Fps is (10 sec: 2047.9, 60 sec: 1774.9, 300 sec: 2304.9). Total num frames: 10362880. Throughput: 0: 444.7. Samples: 590874. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-02-27 12:06:12,385][00394] Avg episode reward: [(0, '13.658')] +[2023-02-27 12:06:17,266][00394] Fps is (10 sec: 1637.3, 60 sec: 1911.2, 300 sec: 2290.9). Total num frames: 10375168. Throughput: 0: 416.4. Samples: 591920. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) +[2023-02-27 12:06:17,476][00394] Avg episode reward: [(0, '13.850')] +[2023-02-27 12:06:21,965][37558] Updated weights for policy 0, policy_version 2535 (0.0217) +[2023-02-27 12:06:22,235][00394] Fps is (10 sec: 2047.9, 60 sec: 1843.2, 300 sec: 2277.1). Total num frames: 10383360. Throughput: 0: 397.3. Samples: 594390. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 12:06:22,288][00394] Avg episode reward: [(0, '13.919')] +[2023-02-27 12:06:27,232][00394] Fps is (10 sec: 1639.6, 60 sec: 1706.7, 300 sec: 2291.0). Total num frames: 10391552. Throughput: 0: 437.0. Samples: 597098. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2023-02-27 12:06:27,346][00394] Avg episode reward: [(0, '14.468')] +[2023-02-27 12:06:32,231][00394] Fps is (10 sec: 2048.2, 60 sec: 1706.7, 300 sec: 2291.0). Total num frames: 10403840. Throughput: 0: 444.1. Samples: 598950. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0) +[2023-02-27 12:06:32,386][00394] Avg episode reward: [(0, '15.074')] +[2023-02-27 12:06:37,000][37536] Signal inference workers to stop experience collection... (100 times) +[2023-02-27 12:06:37,038][37558] InferenceWorker_p0-w0: stopping experience collection (100 times) +[2023-02-27 12:06:37,217][37536] Signal inference workers to resume experience collection... (100 times) +[2023-02-27 12:06:37,217][37558] InferenceWorker_p0-w0: resuming experience collection (100 times) +[2023-02-27 12:06:37,353][00394] Fps is (10 sec: 2832.5, 60 sec: 1839.8, 300 sec: 2317.8). Total num frames: 10420224. Throughput: 0: 495.5. Samples: 603586. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:06:37,407][00394] Avg episode reward: [(0, '16.229')] +[2023-02-27 12:06:39,286][37558] Updated weights for policy 0, policy_version 2545 (0.0243) +[2023-02-27 12:06:42,230][00394] Fps is (10 sec: 2457.6, 60 sec: 1911.5, 300 sec: 2277.1). Total num frames: 10428416. Throughput: 0: 547.6. Samples: 606552. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:06:42,451][00394] Avg episode reward: [(0, '17.067')] +[2023-02-27 12:06:47,234][00394] Fps is (10 sec: 1658.6, 60 sec: 1911.4, 300 sec: 2249.3). Total num frames: 10436608. Throughput: 0: 526.0. Samples: 607776. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0) +[2023-02-27 12:06:47,440][00394] Avg episode reward: [(0, '16.943')] +[2023-02-27 12:06:52,252][00394] Fps is (10 sec: 1228.8, 60 sec: 1911.7, 300 sec: 2249.3). Total num frames: 10440704. Throughput: 0: 478.4. Samples: 609712. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0) +[2023-02-27 12:06:52,359][00394] Avg episode reward: [(0, '17.516')] +[2023-02-27 12:06:54,590][37536] Saving new best policy, reward=17.516! +[2023-02-27 12:06:57,254][00394] Fps is (10 sec: 819.3, 60 sec: 1911.5, 300 sec: 2235.5). Total num frames: 10444800. Throughput: 0: 438.0. Samples: 610584. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) +[2023-02-27 12:06:57,366][00394] Avg episode reward: [(0, '17.310')] +[2023-02-27 12:07:01,855][37558] Updated weights for policy 0, policy_version 2555 (0.0217) +[2023-02-27 12:07:02,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2048.0, 300 sec: 2263.2). Total num frames: 10465280. Throughput: 0: 458.3. Samples: 612540. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-27 12:07:02,249][00394] Avg episode reward: [(0, '18.727')] +[2023-02-27 12:07:02,262][37536] Saving new best policy, reward=18.727! +[2023-02-27 12:07:07,232][00394] Fps is (10 sec: 4095.1, 60 sec: 2116.2, 300 sec: 2277.1). Total num frames: 10485760. Throughput: 0: 530.3. Samples: 618256. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:07:07,258][00394] Avg episode reward: [(0, '17.309')] +[2023-02-27 12:07:12,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2252.8, 300 sec: 2249.3). Total num frames: 10498048. Throughput: 0: 586.3. Samples: 623482. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:07:12,288][00394] Avg episode reward: [(0, '17.390')] +[2023-02-27 12:07:15,359][37558] Updated weights for policy 0, policy_version 2565 (0.0058) +[2023-02-27 12:07:17,276][00394] Fps is (10 sec: 2048.2, 60 sec: 2184.7, 300 sec: 2249.3). Total num frames: 10506240. Throughput: 0: 577.5. Samples: 624940. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:07:17,419][00394] Avg episode reward: [(0, '17.988')] +[2023-02-27 12:07:22,237][00394] Fps is (10 sec: 2048.0, 60 sec: 2252.8, 300 sec: 2249.4). Total num frames: 10518528. Throughput: 0: 543.1. Samples: 627960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:07:22,321][00394] Avg episode reward: [(0, '17.709')] +[2023-02-27 12:07:27,236][00394] Fps is (10 sec: 2457.9, 60 sec: 2321.1, 300 sec: 2249.3). Total num frames: 10530816. Throughput: 0: 557.8. Samples: 631654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2023-02-27 12:07:27,294][00394] Avg episode reward: [(0, '18.021')] +[2023-02-27 12:07:30,911][37558] Updated weights for policy 0, policy_version 2575 (0.0187) +[2023-02-27 12:07:32,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2457.6, 300 sec: 2277.1). Total num frames: 10551296. Throughput: 0: 578.9. Samples: 633826. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:07:32,253][00394] Avg episode reward: [(0, '17.359')] +[2023-02-27 12:07:37,230][00394] Fps is (10 sec: 4096.1, 60 sec: 2531.0, 300 sec: 2277.1). Total num frames: 10571776. Throughput: 0: 672.1. Samples: 639956. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:07:37,233][00394] Avg episode reward: [(0, '17.855')] +[2023-02-27 12:07:42,230][00394] Fps is (10 sec: 3276.7, 60 sec: 2594.1, 300 sec: 2263.2). Total num frames: 10584064. Throughput: 0: 764.8. Samples: 645000. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:07:42,299][00394] Avg episode reward: [(0, '17.475')] +[2023-02-27 12:07:43,880][37558] Updated weights for policy 0, policy_version 2585 (0.0061) +[2023-02-27 12:07:47,232][00394] Fps is (10 sec: 2457.6, 60 sec: 2662.5, 300 sec: 2277.1). Total num frames: 10596352. Throughput: 0: 759.0. Samples: 646694. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:07:47,285][00394] Avg episode reward: [(0, '16.812')] +[2023-02-27 12:07:52,261][00394] Fps is (10 sec: 2047.5, 60 sec: 2730.6, 300 sec: 2277.1). Total num frames: 10604544. Throughput: 0: 706.1. Samples: 650032. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:07:52,426][00394] Avg episode reward: [(0, '17.453')] +[2023-02-27 12:07:57,230][00394] Fps is (10 sec: 2867.2, 60 sec: 3003.7, 300 sec: 2304.9). Total num frames: 10625024. Throughput: 0: 675.6. Samples: 653886. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0) +[2023-02-27 12:07:57,257][00394] Avg episode reward: [(0, '16.878')] +[2023-02-27 12:07:57,339][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002594_10625024.pth... +[2023-02-27 12:07:57,679][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002465_10096640.pth +[2023-02-27 12:07:59,213][37558] Updated weights for policy 0, policy_version 2595 (0.0163) +[2023-02-27 12:08:02,230][00394] Fps is (10 sec: 3687.3, 60 sec: 2935.5, 300 sec: 2318.8). Total num frames: 10641408. Throughput: 0: 702.4. Samples: 656548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:08:02,263][00394] Avg episode reward: [(0, '17.431')] +[2023-02-27 12:08:07,237][00394] Fps is (10 sec: 3276.8, 60 sec: 2867.3, 300 sec: 2304.9). Total num frames: 10657792. Throughput: 0: 761.5. Samples: 662228. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:08:07,316][00394] Avg episode reward: [(0, '17.632')] +[2023-02-27 12:08:11,560][37558] Updated weights for policy 0, policy_version 2605 (0.0043) +[2023-02-27 12:08:12,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2867.2, 300 sec: 2291.0). Total num frames: 10670080. Throughput: 0: 760.9. Samples: 665892. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:08:12,277][00394] Avg episode reward: [(0, '17.761')] +[2023-02-27 12:08:17,247][00394] Fps is (10 sec: 2048.0, 60 sec: 2867.3, 300 sec: 2291.0). Total num frames: 10678272. Throughput: 0: 755.9. Samples: 667840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:08:17,406][00394] Avg episode reward: [(0, '18.309')] +[2023-02-27 12:08:22,237][00394] Fps is (10 sec: 2048.0, 60 sec: 2867.2, 300 sec: 2291.0). Total num frames: 10690560. Throughput: 0: 704.3. Samples: 671648. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:08:22,335][00394] Avg episode reward: [(0, '17.736')] +[2023-02-27 12:08:26,763][37558] Updated weights for policy 0, policy_version 2615 (0.0100) +[2023-02-27 12:08:27,230][00394] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 2318.8). Total num frames: 10711040. Throughput: 0: 688.1. Samples: 675966. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2023-02-27 12:08:27,252][00394] Avg episode reward: [(0, '17.348')] +[2023-02-27 12:08:32,230][00394] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 2332.6). Total num frames: 10731520. Throughput: 0: 720.8. Samples: 679128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:08:32,239][00394] Avg episode reward: [(0, '17.840')] +[2023-02-27 12:08:37,233][00394] Fps is (10 sec: 3275.9, 60 sec: 2867.1, 300 sec: 2304.8). Total num frames: 10743808. Throughput: 0: 764.7. Samples: 684444. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:08:37,320][00394] Avg episode reward: [(0, '17.914')] +[2023-02-27 12:08:39,723][37558] Updated weights for policy 0, policy_version 2625 (0.0048) +[2023-02-27 12:08:42,234][00394] Fps is (10 sec: 2456.5, 60 sec: 2867.0, 300 sec: 2318.7). Total num frames: 10756096. Throughput: 0: 752.6. Samples: 687758. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 12:08:42,291][00394] Avg episode reward: [(0, '17.345')] +[2023-02-27 12:08:47,246][00394] Fps is (10 sec: 2458.3, 60 sec: 2867.2, 300 sec: 2332.6). Total num frames: 10768384. Throughput: 0: 735.1. Samples: 689626. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:08:47,327][00394] Avg episode reward: [(0, '16.830')] +[2023-02-27 12:08:52,230][00394] Fps is (10 sec: 2868.5, 60 sec: 3003.9, 300 sec: 2346.5). Total num frames: 10784768. Throughput: 0: 700.1. Samples: 693734. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) +[2023-02-27 12:08:52,248][00394] Avg episode reward: [(0, '18.350')] +[2023-02-27 12:08:53,457][37558] Updated weights for policy 0, policy_version 2635 (0.0101) +[2023-02-27 12:08:57,230][00394] Fps is (10 sec: 4096.1, 60 sec: 3072.0, 300 sec: 2402.1). Total num frames: 10809344. Throughput: 0: 758.1. Samples: 700006. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:08:57,237][00394] Avg episode reward: [(0, '19.344')] +[2023-02-27 12:08:57,249][37536] Saving new best policy, reward=19.344! +[2023-02-27 12:09:02,244][00394] Fps is (10 sec: 3684.0, 60 sec: 3003.4, 300 sec: 2360.4). Total num frames: 10821632. Throughput: 0: 783.1. Samples: 703086. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:09:02,429][00394] Avg episode reward: [(0, '19.916')] +[2023-02-27 12:09:04,015][37536] Saving new best policy, reward=19.916! +[2023-02-27 12:09:07,052][37558] Updated weights for policy 0, policy_version 2645 (0.0054) +[2023-02-27 12:09:07,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2935.5, 300 sec: 2332.6). Total num frames: 10833920. Throughput: 0: 770.9. Samples: 706338. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:09:07,289][00394] Avg episode reward: [(0, '19.043')] +[2023-02-27 12:09:12,237][00394] Fps is (10 sec: 2459.1, 60 sec: 2935.4, 300 sec: 2360.4). Total num frames: 10846208. Throughput: 0: 752.4. Samples: 709824. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:09:12,324][00394] Avg episode reward: [(0, '19.273')] +[2023-02-27 12:09:17,234][00394] Fps is (10 sec: 2867.2, 60 sec: 3072.0, 300 sec: 2402.1). Total num frames: 10862592. Throughput: 0: 723.9. Samples: 711702. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:09:17,272][00394] Avg episode reward: [(0, '19.708')] +[2023-02-27 12:09:20,132][37558] Updated weights for policy 0, policy_version 2655 (0.0072) +[2023-02-27 12:09:22,230][00394] Fps is (10 sec: 3686.6, 60 sec: 3208.5, 300 sec: 2471.5). Total num frames: 10883072. Throughput: 0: 735.1. Samples: 717522. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:09:22,233][00394] Avg episode reward: [(0, '19.667')] +[2023-02-27 12:09:27,235][00394] Fps is (10 sec: 3275.1, 60 sec: 3071.7, 300 sec: 2457.6). Total num frames: 10895360. Throughput: 0: 787.5. Samples: 723196. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:09:27,517][00394] Avg episode reward: [(0, '18.430')] +[2023-02-27 12:09:32,232][00394] Fps is (10 sec: 2457.6, 60 sec: 2935.5, 300 sec: 2429.8). Total num frames: 10907648. Throughput: 0: 775.2. Samples: 724512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:09:32,267][00394] Avg episode reward: [(0, '18.702')] +[2023-02-27 12:09:35,710][37558] Updated weights for policy 0, policy_version 2665 (0.0043) +[2023-02-27 12:09:37,244][00394] Fps is (10 sec: 2049.0, 60 sec: 2867.3, 300 sec: 2443.7). Total num frames: 10915840. Throughput: 0: 761.6. Samples: 728006. Policy #0 lag: (min: 0.0, avg: 0.6, max: 3.0) +[2023-02-27 12:09:37,412][00394] Avg episode reward: [(0, '18.008')] +[2023-02-27 12:09:42,230][00394] Fps is (10 sec: 2867.2, 60 sec: 3004.0, 300 sec: 2500.0). Total num frames: 10936320. Throughput: 0: 702.9. Samples: 731636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:09:42,256][00394] Avg episode reward: [(0, '18.610')] +[2023-02-27 12:09:47,065][37558] Updated weights for policy 0, policy_version 2675 (0.0112) +[2023-02-27 12:09:47,230][00394] Fps is (10 sec: 4096.1, 60 sec: 3140.3, 300 sec: 2554.8). Total num frames: 10956800. Throughput: 0: 703.0. Samples: 734716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:09:47,233][00394] Avg episode reward: [(0, '19.662')] +[2023-02-27 12:09:52,235][00394] Fps is (10 sec: 3686.0, 60 sec: 3140.2, 300 sec: 2554.8). Total num frames: 10973184. Throughput: 0: 777.0. Samples: 741302. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:09:52,332][00394] Avg episode reward: [(0, '19.234')] +[2023-02-27 12:09:57,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 2513.1). Total num frames: 10981376. Throughput: 0: 771.5. Samples: 744542. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 12:09:57,265][00394] Avg episode reward: [(0, '20.313')] +[2023-02-27 12:09:58,628][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002682_10985472.pth... +[2023-02-27 12:09:59,229][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002523_10334208.pth +[2023-02-27 12:09:59,237][37536] Saving new best policy, reward=20.313! +[2023-02-27 12:10:02,236][00394] Fps is (10 sec: 1638.5, 60 sec: 2799.2, 300 sec: 2513.1). Total num frames: 10989568. Throughput: 0: 759.6. Samples: 745882. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:10:02,404][00394] Avg episode reward: [(0, '20.217')] +[2023-02-27 12:10:05,263][37558] Updated weights for policy 0, policy_version 2685 (0.0140) +[2023-02-27 12:10:07,237][00394] Fps is (10 sec: 2048.0, 60 sec: 2798.9, 300 sec: 2527.0). Total num frames: 11001856. Throughput: 0: 692.2. Samples: 748672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:10:07,295][00394] Avg episode reward: [(0, '20.475')] +[2023-02-27 12:10:07,374][37536] Saving new best policy, reward=20.475! +[2023-02-27 12:10:12,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2867.2, 300 sec: 2568.7). Total num frames: 11018240. Throughput: 0: 652.8. Samples: 752570. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:10:12,254][00394] Avg episode reward: [(0, '21.306')] +[2023-02-27 12:10:12,258][37536] Saving new best policy, reward=21.306! +[2023-02-27 12:10:17,139][37558] Updated weights for policy 0, policy_version 2695 (0.0051) +[2023-02-27 12:10:17,231][00394] Fps is (10 sec: 3686.3, 60 sec: 2935.5, 300 sec: 2596.4). Total num frames: 11038720. Throughput: 0: 681.8. Samples: 755192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:10:17,260][00394] Avg episode reward: [(0, '22.054')] +[2023-02-27 12:10:17,287][37536] Saving new best policy, reward=22.054! +[2023-02-27 12:10:22,231][00394] Fps is (10 sec: 2457.3, 60 sec: 2662.3, 300 sec: 2554.8). Total num frames: 11042816. Throughput: 0: 713.9. Samples: 760130. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:10:22,399][00394] Avg episode reward: [(0, '21.832')] +[2023-02-27 12:10:27,234][00394] Fps is (10 sec: 1638.1, 60 sec: 2662.5, 300 sec: 2554.8). Total num frames: 11055104. Throughput: 0: 695.3. Samples: 762924. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0) +[2023-02-27 12:10:27,266][00394] Avg episode reward: [(0, '22.201')] +[2023-02-27 12:10:27,361][37536] Saving new best policy, reward=22.201! +[2023-02-27 12:10:32,235][00394] Fps is (10 sec: 2047.2, 60 sec: 2593.9, 300 sec: 2554.9). Total num frames: 11063296. Throughput: 0: 658.6. Samples: 764356. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:10:32,402][00394] Avg episode reward: [(0, '21.700')] +[2023-02-27 12:10:37,244][00394] Fps is (10 sec: 2048.4, 60 sec: 2662.4, 300 sec: 2582.6). Total num frames: 11075584. Throughput: 0: 588.6. Samples: 767788. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 12:10:37,421][00394] Avg episode reward: [(0, '22.517')] +[2023-02-27 12:10:37,845][37536] Saving new best policy, reward=22.517! +[2023-02-27 12:10:37,848][37558] Updated weights for policy 0, policy_version 2705 (0.0213) +[2023-02-27 12:10:42,230][00394] Fps is (10 sec: 3278.5, 60 sec: 2662.4, 300 sec: 2624.2). Total num frames: 11096064. Throughput: 0: 607.6. Samples: 771882. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 12:10:42,255][00394] Avg episode reward: [(0, '21.448')] +[2023-02-27 12:10:47,230][00394] Fps is (10 sec: 4096.1, 60 sec: 2662.4, 300 sec: 2679.8). Total num frames: 11116544. Throughput: 0: 645.2. Samples: 774914. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:10:47,236][00394] Avg episode reward: [(0, '20.766')] +[2023-02-27 12:10:47,964][37558] Updated weights for policy 0, policy_version 2715 (0.0050) +[2023-02-27 12:10:52,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2594.2, 300 sec: 2707.5). Total num frames: 11128832. Throughput: 0: 708.7. Samples: 780562. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:10:52,287][00394] Avg episode reward: [(0, '20.186')] +[2023-02-27 12:10:57,255][00394] Fps is (10 sec: 2047.7, 60 sec: 2594.1, 300 sec: 2693.6). Total num frames: 11137024. Throughput: 0: 692.0. Samples: 783710. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:10:57,577][00394] Avg episode reward: [(0, '20.531')] +[2023-02-27 12:11:02,230][00394] Fps is (10 sec: 2457.6, 60 sec: 2730.7, 300 sec: 2693.6). Total num frames: 11153408. Throughput: 0: 664.2. Samples: 785080. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) +[2023-02-27 12:11:02,292][00394] Avg episode reward: [(0, '20.263')] +[2023-02-27 12:11:05,077][37558] Updated weights for policy 0, policy_version 2725 (0.0135) +[2023-02-27 12:11:07,230][00394] Fps is (10 sec: 3277.3, 60 sec: 2798.9, 300 sec: 2735.3). Total num frames: 11169792. Throughput: 0: 647.6. Samples: 789272. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 12:11:07,250][00394] Avg episode reward: [(0, '20.119')] +[2023-02-27 12:11:12,230][00394] Fps is (10 sec: 3686.4, 60 sec: 2867.2, 300 sec: 2763.1). Total num frames: 11190272. Throughput: 0: 729.7. Samples: 795758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:11:12,239][00394] Avg episode reward: [(0, '21.085')] +[2023-02-27 12:11:16,954][37558] Updated weights for policy 0, policy_version 2735 (0.0043) +[2023-02-27 12:11:17,231][00394] Fps is (10 sec: 3276.3, 60 sec: 2730.6, 300 sec: 2776.9). Total num frames: 11202560. Throughput: 0: 760.5. Samples: 798576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:11:17,291][00394] Avg episode reward: [(0, '21.750')] +[2023-02-27 12:11:22,245][00394] Fps is (10 sec: 2456.8, 60 sec: 2867.1, 300 sec: 2790.8). Total num frames: 11214848. Throughput: 0: 755.1. Samples: 801770. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:11:22,295][00394] Avg episode reward: [(0, '22.248')] +[2023-02-27 12:11:27,238][00394] Fps is (10 sec: 2048.3, 60 sec: 2799.0, 300 sec: 2776.9). Total num frames: 11223040. Throughput: 0: 737.6. Samples: 805074. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:11:27,339][00394] Avg episode reward: [(0, '22.463')] +[2023-02-27 12:11:32,231][00394] Fps is (10 sec: 2458.5, 60 sec: 2935.7, 300 sec: 2778.1). Total num frames: 11239424. Throughput: 0: 708.9. Samples: 806814. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-27 12:11:32,260][00394] Avg episode reward: [(0, '23.549')] +[2023-02-27 12:11:32,275][37536] Saving new best policy, reward=23.549! +[2023-02-27 12:11:33,843][37558] Updated weights for policy 0, policy_version 2745 (0.0112) +[2023-02-27 12:11:37,230][00394] Fps is (10 sec: 3276.9, 60 sec: 3003.7, 300 sec: 2804.7). Total num frames: 11255808. Throughput: 0: 686.0. Samples: 811430. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:11:37,257][00394] Avg episode reward: [(0, '23.536')] +[2023-02-27 12:11:42,235][00394] Fps is (10 sec: 3275.0, 60 sec: 2935.2, 300 sec: 2832.4). Total num frames: 11272192. Throughput: 0: 740.7. Samples: 817044. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:11:42,279][00394] Avg episode reward: [(0, '23.766')] +[2023-02-27 12:11:42,288][37536] Saving new best policy, reward=23.766! +[2023-02-27 12:11:46,853][37558] Updated weights for policy 0, policy_version 2755 (0.0061) +[2023-02-27 12:11:47,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2798.9, 300 sec: 2860.3). Total num frames: 11284480. Throughput: 0: 745.7. Samples: 818636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:11:47,261][00394] Avg episode reward: [(0, '23.584')] +[2023-02-27 12:11:52,238][00394] Fps is (10 sec: 2049.1, 60 sec: 2730.7, 300 sec: 2874.1). Total num frames: 11292672. Throughput: 0: 726.0. Samples: 821940. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:11:52,328][00394] Avg episode reward: [(0, '22.751')] +[2023-02-27 12:11:57,232][00394] Fps is (10 sec: 2048.0, 60 sec: 2799.0, 300 sec: 2846.4). Total num frames: 11304960. Throughput: 0: 663.1. Samples: 825596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:11:57,321][00394] Avg episode reward: [(0, '23.142')] +[2023-02-27 12:11:57,890][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002761_11309056.pth... +[2023-02-27 12:11:58,228][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002594_10625024.pth +[2023-02-27 12:12:01,596][37558] Updated weights for policy 0, policy_version 2765 (0.0155) +[2023-02-27 12:12:02,230][00394] Fps is (10 sec: 3276.7, 60 sec: 2867.2, 300 sec: 2846.4). Total num frames: 11325440. Throughput: 0: 648.6. Samples: 827764. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 12:12:02,250][00394] Avg episode reward: [(0, '22.490')] +[2023-02-27 12:12:07,232][00394] Fps is (10 sec: 4096.0, 60 sec: 2935.5, 300 sec: 2874.1). Total num frames: 11345920. Throughput: 0: 712.1. Samples: 833810. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:12:07,272][00394] Avg episode reward: [(0, '21.696')] +[2023-02-27 12:12:12,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2730.7, 300 sec: 2874.2). Total num frames: 11354112. Throughput: 0: 721.5. Samples: 837540. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:12:12,262][00394] Avg episode reward: [(0, '21.019')] +[2023-02-27 12:12:17,217][37558] Updated weights for policy 0, policy_version 2775 (0.0123) +[2023-02-27 12:12:17,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2730.7, 300 sec: 2874.1). Total num frames: 11366400. Throughput: 0: 718.6. Samples: 839152. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:12:17,344][00394] Avg episode reward: [(0, '20.491')] +[2023-02-27 12:12:22,230][00394] Fps is (10 sec: 2457.7, 60 sec: 2730.8, 300 sec: 2874.1). Total num frames: 11378688. Throughput: 0: 692.0. Samples: 842572. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:12:22,277][00394] Avg episode reward: [(0, '21.273')] +[2023-02-27 12:12:27,230][00394] Fps is (10 sec: 3276.7, 60 sec: 2935.5, 300 sec: 2874.1). Total num frames: 11399168. Throughput: 0: 677.9. Samples: 847544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:12:27,244][00394] Avg episode reward: [(0, '21.987')] +[2023-02-27 12:12:29,017][37558] Updated weights for policy 0, policy_version 2785 (0.0040) +[2023-02-27 12:12:32,230][00394] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 2874.1). Total num frames: 11419648. Throughput: 0: 715.3. Samples: 850826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:12:32,232][00394] Avg episode reward: [(0, '21.452')] +[2023-02-27 12:12:37,230][00394] Fps is (10 sec: 2867.1, 60 sec: 2867.2, 300 sec: 2860.3). Total num frames: 11427840. Throughput: 0: 755.7. Samples: 855948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:12:37,359][00394] Avg episode reward: [(0, '21.622')] +[2023-02-27 12:12:42,231][00394] Fps is (10 sec: 2047.9, 60 sec: 2799.2, 300 sec: 2860.3). Total num frames: 11440128. Throughput: 0: 742.9. Samples: 859026. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 12:12:42,336][00394] Avg episode reward: [(0, '22.700')] +[2023-02-27 12:12:46,008][37558] Updated weights for policy 0, policy_version 2795 (0.0102) +[2023-02-27 12:12:47,253][00394] Fps is (10 sec: 2047.9, 60 sec: 2730.6, 300 sec: 2860.3). Total num frames: 11448320. Throughput: 0: 726.5. Samples: 860458. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:12:47,380][00394] Avg episode reward: [(0, '23.265')] +[2023-02-27 12:12:52,232][00394] Fps is (10 sec: 2867.4, 60 sec: 2935.5, 300 sec: 2860.3). Total num frames: 11468800. Throughput: 0: 669.6. Samples: 863940. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2023-02-27 12:12:52,258][00394] Avg episode reward: [(0, '24.148')] +[2023-02-27 12:12:52,261][37536] Saving new best policy, reward=24.148! +[2023-02-27 12:12:57,231][00394] Fps is (10 sec: 3277.0, 60 sec: 2935.5, 300 sec: 2846.4). Total num frames: 11481088. Throughput: 0: 695.8. Samples: 868850. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:12:57,265][00394] Avg episode reward: [(0, '24.112')] +[2023-02-27 12:12:58,933][37558] Updated weights for policy 0, policy_version 2805 (0.0067) +[2023-02-27 12:13:02,241][00394] Fps is (10 sec: 2457.5, 60 sec: 2798.9, 300 sec: 2832.5). Total num frames: 11493376. Throughput: 0: 716.6. Samples: 871400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:13:02,347][00394] Avg episode reward: [(0, '23.611')] +[2023-02-27 12:13:07,237][00394] Fps is (10 sec: 2457.6, 60 sec: 2662.4, 300 sec: 2832.5). Total num frames: 11505664. Throughput: 0: 715.3. Samples: 874762. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 12:13:07,292][00394] Avg episode reward: [(0, '25.169')] +[2023-02-27 12:13:07,370][37536] Saving new best policy, reward=25.169! +[2023-02-27 12:13:12,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2662.4, 300 sec: 2832.5). Total num frames: 11513856. Throughput: 0: 668.5. Samples: 877628. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 12:13:12,319][00394] Avg episode reward: [(0, '24.543')] +[2023-02-27 12:13:17,243][00394] Fps is (10 sec: 2048.0, 60 sec: 2662.4, 300 sec: 2832.5). Total num frames: 11526144. Throughput: 0: 632.4. Samples: 879286. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:13:17,293][00394] Avg episode reward: [(0, '24.189')] +[2023-02-27 12:13:18,746][37558] Updated weights for policy 0, policy_version 2815 (0.0221) +[2023-02-27 12:13:22,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2730.7, 300 sec: 2818.6). Total num frames: 11542528. Throughput: 0: 600.2. Samples: 882958. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-27 12:13:22,260][00394] Avg episode reward: [(0, '23.625')] +[2023-02-27 12:13:27,230][00394] Fps is (10 sec: 3276.8, 60 sec: 2662.4, 300 sec: 2804.7). Total num frames: 11558912. Throughput: 0: 651.5. Samples: 888344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:13:27,236][00394] Avg episode reward: [(0, '22.528')] +[2023-02-27 12:13:32,242][00394] Fps is (10 sec: 2456.0, 60 sec: 2457.3, 300 sec: 2790.8). Total num frames: 11567104. Throughput: 0: 664.5. Samples: 890364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:13:32,372][00394] Avg episode reward: [(0, '22.932')] +[2023-02-27 12:13:33,528][37558] Updated weights for policy 0, policy_version 2825 (0.0043) +[2023-02-27 12:13:37,243][00394] Fps is (10 sec: 2046.5, 60 sec: 2525.6, 300 sec: 2790.8). Total num frames: 11579392. Throughput: 0: 645.3. Samples: 892982. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:13:37,289][00394] Avg episode reward: [(0, '23.593')] +[2023-02-27 12:13:42,231][00394] Fps is (10 sec: 2049.1, 60 sec: 2457.6, 300 sec: 2776.9). Total num frames: 11587584. Throughput: 0: 607.8. Samples: 896202. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:13:42,304][00394] Avg episode reward: [(0, '22.710')] +[2023-02-27 12:13:47,230][00394] Fps is (10 sec: 2869.3, 60 sec: 2662.4, 300 sec: 2790.8). Total num frames: 11608064. Throughput: 0: 588.6. Samples: 897888. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:13:47,260][00394] Avg episode reward: [(0, '22.285')] +[2023-02-27 12:13:48,280][37558] Updated weights for policy 0, policy_version 2835 (0.0089) +[2023-02-27 12:13:52,230][00394] Fps is (10 sec: 4096.4, 60 sec: 2662.4, 300 sec: 2776.9). Total num frames: 11628544. Throughput: 0: 647.2. Samples: 903888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:13:52,232][00394] Avg episode reward: [(0, '22.378')] +[2023-02-27 12:13:57,237][00394] Fps is (10 sec: 2865.1, 60 sec: 2593.8, 300 sec: 2763.1). Total num frames: 11636736. Throughput: 0: 700.9. Samples: 909174. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:13:57,329][00394] Avg episode reward: [(0, '22.876')] +[2023-02-27 12:13:58,221][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002842_11640832.pth... +[2023-02-27 12:13:58,669][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002682_10985472.pth +[2023-02-27 12:14:02,230][00394] Fps is (10 sec: 2048.0, 60 sec: 2594.1, 300 sec: 2763.1). Total num frames: 11649024. Throughput: 0: 687.6. Samples: 910228. Policy #0 lag: (min: 1.0, avg: 1.0, max: 2.0) +[2023-02-27 12:14:02,302][00394] Avg episode reward: [(0, '22.813')] +[2023-02-27 12:14:04,280][37558] Updated weights for policy 0, policy_version 2845 (0.0074) +[2023-02-27 12:14:07,236][00394] Fps is (10 sec: 2049.4, 60 sec: 2525.9, 300 sec: 2749.2). Total num frames: 11657216. Throughput: 0: 676.0. Samples: 913380. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-27 12:14:07,293][00394] Avg episode reward: [(0, '22.379')] +[2023-02-27 12:14:12,230][00394] Fps is (10 sec: 2867.2, 60 sec: 2730.7, 300 sec: 2763.1). Total num frames: 11677696. Throughput: 0: 648.5. Samples: 917528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:14:12,254][00394] Avg episode reward: [(0, '22.108')] +[2023-02-27 12:14:15,729][37558] Updated weights for policy 0, policy_version 2855 (0.0057) +[2023-02-27 12:14:17,230][00394] Fps is (10 sec: 4096.1, 60 sec: 2867.2, 300 sec: 2763.1). Total num frames: 11698176. Throughput: 0: 672.5. Samples: 920624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:14:17,234][00394] Avg episode reward: [(0, '23.253')] +[2023-02-27 12:14:22,232][00394] Fps is (10 sec: 3685.6, 60 sec: 2867.1, 300 sec: 2777.0). Total num frames: 11714560. Throughput: 0: 755.5. Samples: 926974. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:14:22,272][00394] Avg episode reward: [(0, '21.881')] +[2023-02-27 12:14:27,243][00394] Fps is (10 sec: 2457.6, 60 sec: 2730.7, 300 sec: 2763.1). Total num frames: 11722752. Throughput: 0: 753.6. Samples: 930112. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:14:27,354][00394] Avg episode reward: [(0, '22.076')] +[2023-02-27 12:14:32,239][37558] Updated weights for policy 0, policy_version 2865 (0.0133) +[2023-02-27 12:14:32,230][00394] Fps is (10 sec: 1638.7, 60 sec: 2730.9, 300 sec: 2763.1). Total num frames: 11730944. Throughput: 0: 748.7. Samples: 931580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:14:32,345][00394] Avg episode reward: [(0, '22.293')] +[2023-02-27 12:14:37,241][00394] Fps is (10 sec: 2457.6, 60 sec: 2799.3, 300 sec: 2749.2). Total num frames: 11747328. Throughput: 0: 685.7. Samples: 934744. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-27 12:14:37,305][00394] Avg episode reward: [(0, '22.710')] +[2023-02-27 12:14:42,230][00394] Fps is (10 sec: 3686.6, 60 sec: 3003.8, 300 sec: 2749.2). Total num frames: 11767808. Throughput: 0: 678.6. Samples: 939708. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-27 12:14:42,256][00394] Avg episode reward: [(0, '21.924')] +[2023-02-27 12:14:44,122][37558] Updated weights for policy 0, policy_version 2875 (0.0078) +[2023-02-27 12:14:47,230][00394] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 2763.1). Total num frames: 11788288. Throughput: 0: 725.4. Samples: 942872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:14:47,232][00394] Avg episode reward: [(0, '21.549')] +[2023-02-27 12:14:52,253][00394] Fps is (10 sec: 2864.2, 60 sec: 2798.5, 300 sec: 2763.0). Total num frames: 11796480. Throughput: 0: 769.0. Samples: 947992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:14:52,389][00394] Avg episode reward: [(0, '22.595')] +[2023-02-27 12:14:57,246][00394] Fps is (10 sec: 2047.1, 60 sec: 2867.3, 300 sec: 2776.9). Total num frames: 11808768. Throughput: 0: 746.7. Samples: 951132. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2023-02-27 12:14:57,360][00394] Avg episode reward: [(0, '22.630')] +[2023-02-27 12:15:00,340][37558] Updated weights for policy 0, policy_version 2885 (0.0085) +[2023-02-27 12:15:02,239][00394] Fps is (10 sec: 2049.8, 60 sec: 2798.9, 300 sec: 2763.0). Total num frames: 11816960. Throughput: 0: 714.1. Samples: 952758. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-27 12:15:02,345][00394] Avg episode reward: [(0, '22.280')] +[2023-02-27 12:15:07,234][00394] Fps is (10 sec: 2868.5, 60 sec: 3003.8, 300 sec: 2776.9). Total num frames: 11837440. Throughput: 0: 653.3. Samples: 956370. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-27 12:15:07,272][00394] Avg episode reward: [(0, '23.154')] +[2023-02-27 12:15:11,839][37558] Updated weights for policy 0, policy_version 2895 (0.0096) +[2023-02-27 12:15:12,230][00394] Fps is (10 sec: 4096.8, 60 sec: 3003.7, 300 sec: 2777.0). Total num frames: 11857920. Throughput: 0: 721.1. Samples: 962560. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:15:12,232][00394] Avg episode reward: [(0, '24.079')] +[2023-02-27 12:15:17,233][00394] Fps is (10 sec: 3275.7, 60 sec: 2867.0, 300 sec: 2804.7). Total num frames: 11870208. Throughput: 0: 762.8. Samples: 965910. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:15:17,343][00394] Avg episode reward: [(0, '23.756')] +[2023-02-27 12:15:22,232][00394] Fps is (10 sec: 2866.6, 60 sec: 2867.2, 300 sec: 2818.6). Total num frames: 11886592. Throughput: 0: 762.5. Samples: 969058. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-27 12:15:22,284][00394] Avg episode reward: [(0, '22.848')] +[2023-02-27 12:15:27,230][00394] Fps is (10 sec: 2458.4, 60 sec: 2867.2, 300 sec: 2818.7). Total num frames: 11894784. Throughput: 0: 732.0. Samples: 972650. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:15:27,282][00394] Avg episode reward: [(0, '21.104')] +[2023-02-27 12:15:29,409][37558] Updated weights for policy 0, policy_version 2905 (0.0107) +[2023-02-27 12:15:32,230][00394] Fps is (10 sec: 2458.1, 60 sec: 3003.8, 300 sec: 2832.5). Total num frames: 11911168. Throughput: 0: 701.0. Samples: 974416. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2023-02-27 12:15:32,261][00394] Avg episode reward: [(0, '21.612')] +[2023-02-27 12:15:37,230][00394] Fps is (10 sec: 3686.4, 60 sec: 3072.0, 300 sec: 2832.5). Total num frames: 11931648. Throughput: 0: 702.6. Samples: 979602. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:15:37,238][00394] Avg episode reward: [(0, '20.428')] +[2023-02-27 12:15:38,923][37558] Updated weights for policy 0, policy_version 2915 (0.0036) +[2023-02-27 12:15:42,251][00394] Fps is (10 sec: 3686.4, 60 sec: 3003.7, 300 sec: 2818.6). Total num frames: 11948032. Throughput: 0: 776.9. Samples: 986088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:15:42,400][00394] Avg episode reward: [(0, '19.594')] +[2023-02-27 12:15:47,239][00394] Fps is (10 sec: 2457.6, 60 sec: 2798.9, 300 sec: 2804.7). Total num frames: 11956224. Throughput: 0: 773.4. Samples: 987558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:15:47,341][00394] Avg episode reward: [(0, '20.279')] +[2023-02-27 12:15:52,250][00394] Fps is (10 sec: 1638.2, 60 sec: 2799.4, 300 sec: 2804.7). Total num frames: 11964416. Throughput: 0: 753.7. Samples: 990286. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-27 12:15:52,455][00394] Avg episode reward: [(0, '20.935')] +[2023-02-27 12:15:57,249][00394] Fps is (10 sec: 2047.5, 60 sec: 2799.0, 300 sec: 2790.8). Total num frames: 11976704. Throughput: 0: 692.0. Samples: 993700. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-27 12:15:57,425][00394] Avg episode reward: [(0, '20.689')] +[2023-02-27 12:15:57,648][37558] Updated weights for policy 0, policy_version 2925 (0.0175) +[2023-02-27 12:15:57,647][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002925_11980800.pth... +[2023-02-27 12:15:58,077][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002761_11309056.pth +[2023-02-27 12:16:02,230][00394] Fps is (10 sec: 2867.5, 60 sec: 2935.6, 300 sec: 2790.8). Total num frames: 11993088. Throughput: 0: 658.8. Samples: 995552. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-27 12:16:02,267][00394] Avg episode reward: [(0, '22.434')] +[2023-02-27 12:16:04,345][37536] Stopping Batcher_0... +[2023-02-27 12:16:04,346][37536] Loop batcher_evt_loop terminating... +[2023-02-27 12:16:04,352][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth... +[2023-02-27 12:16:04,424][37558] Weights refcount: 2 0 +[2023-02-27 12:16:04,437][00394] Component Batcher_0 stopped! +[2023-02-27 12:16:04,451][37558] Stopping InferenceWorker_p0-w0... +[2023-02-27 12:16:04,451][37558] Loop inference_proc0-0_evt_loop terminating... +[2023-02-27 12:16:04,451][00394] Component InferenceWorker_p0-w0 stopped! +[2023-02-27 12:16:04,648][37536] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002842_11640832.pth +[2023-02-27 12:16:04,651][37536] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth... +[2023-02-27 12:16:04,811][37536] Stopping LearnerWorker_p0... +[2023-02-27 12:16:04,812][00394] Component LearnerWorker_p0 stopped! +[2023-02-27 12:16:04,815][37536] Loop learner_proc0_evt_loop terminating... +[2023-02-27 12:16:04,850][00394] Component RolloutWorker_w1 stopped! +[2023-02-27 12:16:04,853][37555] Stopping RolloutWorker_w1... +[2023-02-27 12:16:04,857][37555] Loop rollout_proc1_evt_loop terminating... +[2023-02-27 12:16:04,860][00394] Component RolloutWorker_w5 stopped! +[2023-02-27 12:16:04,863][00394] Component RolloutWorker_w0 stopped! +[2023-02-27 12:16:04,866][00394] Component RolloutWorker_w3 stopped! +[2023-02-27 12:16:04,864][37569] Stopping RolloutWorker_w5... +[2023-02-27 12:16:04,866][37556] Stopping RolloutWorker_w3... +[2023-02-27 12:16:04,870][37556] Loop rollout_proc3_evt_loop terminating... +[2023-02-27 12:16:04,870][00394] Component RolloutWorker_w6 stopped! +[2023-02-27 12:16:04,872][37569] Loop rollout_proc5_evt_loop terminating... +[2023-02-27 12:16:04,874][37576] Stopping RolloutWorker_w7... +[2023-02-27 12:16:04,875][37576] Loop rollout_proc7_evt_loop terminating... +[2023-02-27 12:16:04,874][00394] Component RolloutWorker_w7 stopped! +[2023-02-27 12:16:04,888][37566] Stopping RolloutWorker_w2... +[2023-02-27 12:16:04,886][00394] Component RolloutWorker_w4 stopped! +[2023-02-27 12:16:04,893][00394] Component RolloutWorker_w2 stopped! +[2023-02-27 12:16:04,897][00394] Waiting for process learner_proc0 to stop... +[2023-02-27 12:16:04,873][37577] Stopping RolloutWorker_w6... +[2023-02-27 12:16:04,901][37577] Loop rollout_proc6_evt_loop terminating... +[2023-02-27 12:16:04,886][37564] Stopping RolloutWorker_w4... +[2023-02-27 12:16:04,909][37564] Loop rollout_proc4_evt_loop terminating... +[2023-02-27 12:16:04,861][37554] Stopping RolloutWorker_w0... +[2023-02-27 12:16:04,910][37554] Loop rollout_proc0_evt_loop terminating... +[2023-02-27 12:16:04,889][37566] Loop rollout_proc2_evt_loop terminating... +[2023-02-27 12:16:08,387][00394] Waiting for process inference_proc0-0 to join... +[2023-02-27 12:16:08,389][00394] Waiting for process rollout_proc0 to join... +[2023-02-27 12:16:08,393][00394] Waiting for process rollout_proc1 to join... +[2023-02-27 12:16:08,401][00394] Waiting for process rollout_proc2 to join... +[2023-02-27 12:16:08,402][00394] Waiting for process rollout_proc3 to join... +[2023-02-27 12:16:08,403][00394] Waiting for process rollout_proc4 to join... +[2023-02-27 12:16:08,408][00394] Waiting for process rollout_proc5 to join... +[2023-02-27 12:16:08,413][00394] Waiting for process rollout_proc6 to join... +[2023-02-27 12:16:08,415][00394] Waiting for process rollout_proc7 to join... +[2023-02-27 12:16:08,417][00394] Batcher 0 profile tree view: +batching: 42.4838, releasing_batches: 0.1596 +[2023-02-27 12:16:08,420][00394] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0048 + wait_policy_total: 440.8138 +update_model: 25.7197 + weight_update: 0.0126 +one_step: 0.0091 + handle_policy_step: 940.2608 + deserialize: 28.1715, stack: 6.2853, obs_to_device_normalize: 193.5817, forward: 497.4783, send_messages: 49.5014 + prepare_outputs: 118.2842 + to_cpu: 66.6327 +[2023-02-27 12:16:08,422][00394] Learner 0 profile tree view: +misc: 0.0056, prepare_batch: 256.8947 +train: 593.2102 + epoch_init: 0.0087, minibatch_init: 0.0101, losses_postprocess: 1.9653, kl_divergence: 5.0174, after_optimizer: 26.6448 + calculate_losses: 239.1089 + losses_init: 0.0090, forward_head: 26.4547, bptt_initial: 125.7768, tail: 22.5763, advantages_returns: 7.1498, losses: 48.1635 + bptt: 8.2512 + bptt_forward_core: 8.1628 + update: 295.4818 + clip: 27.4529 +[2023-02-27 12:16:08,424][00394] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.5821, enqueue_policy_requests: 138.7012, env_step: 934.1600, overhead: 66.7653, complete_rollouts: 8.0681 +save_policy_outputs: 52.9794 + split_output_tensors: 25.8993 +[2023-02-27 12:16:08,426][00394] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.8920, enqueue_policy_requests: 149.2018, env_step: 940.7048, overhead: 67.3373, complete_rollouts: 5.6190 +save_policy_outputs: 55.9852 + split_output_tensors: 26.3858 +[2023-02-27 12:16:08,434][00394] Loop Runner_EvtLoop terminating... +[2023-02-27 12:16:08,438][00394] Runner profile tree view: +main_loop: 1567.2632 +[2023-02-27 12:16:08,439][00394] Collected {0: 12005376}, FPS: 2550.7 +[2023-02-27 12:16:08,544][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 12:16:08,546][00394] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-27 12:16:08,548][00394] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-27 12:16:08,552][00394] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-27 12:16:08,553][00394] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:16:08,556][00394] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-27 12:16:08,558][00394] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:16:08,563][00394] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-27 12:16:08,564][00394] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-27 12:16:08,566][00394] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-27 12:16:08,568][00394] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-27 12:16:08,570][00394] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-27 12:16:08,571][00394] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-27 12:16:08,573][00394] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-27 12:16:08,574][00394] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-27 12:16:08,615][00394] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:16:08,626][00394] RunningMeanStd input shape: (1,) +[2023-02-27 12:16:08,659][00394] ConvEncoder: input_channels=3 +[2023-02-27 12:16:08,810][00394] Conv encoder output size: 512 +[2023-02-27 12:16:08,813][00394] Policy head output size: 512 +[2023-02-27 12:16:08,926][00394] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth... +[2023-02-27 12:16:10,405][00394] Num frames 100... +[2023-02-27 12:16:10,577][00394] Num frames 200... +[2023-02-27 12:16:10,760][00394] Num frames 300... +[2023-02-27 12:16:10,947][00394] Num frames 400... +[2023-02-27 12:16:11,132][00394] Num frames 500... +[2023-02-27 12:16:11,309][00394] Num frames 600... +[2023-02-27 12:16:11,492][00394] Num frames 700... +[2023-02-27 12:16:11,675][00394] Num frames 800... +[2023-02-27 12:16:11,847][00394] Num frames 900... +[2023-02-27 12:16:12,028][00394] Num frames 1000... +[2023-02-27 12:16:12,203][00394] Num frames 1100... +[2023-02-27 12:16:12,387][00394] Num frames 1200... +[2023-02-27 12:16:12,547][00394] Num frames 1300... +[2023-02-27 12:16:12,681][00394] Avg episode rewards: #0: 27.440, true rewards: #0: 13.440 +[2023-02-27 12:16:12,683][00394] Avg episode reward: 27.440, avg true_objective: 13.440 +[2023-02-27 12:16:12,776][00394] Num frames 1400... +[2023-02-27 12:16:12,936][00394] Num frames 1500... +[2023-02-27 12:16:13,103][00394] Num frames 1600... +[2023-02-27 12:16:13,262][00394] Num frames 1700... +[2023-02-27 12:16:13,437][00394] Num frames 1800... +[2023-02-27 12:16:13,601][00394] Num frames 1900... +[2023-02-27 12:16:13,763][00394] Num frames 2000... +[2023-02-27 12:16:13,925][00394] Num frames 2100... +[2023-02-27 12:16:14,095][00394] Num frames 2200... +[2023-02-27 12:16:14,271][00394] Num frames 2300... +[2023-02-27 12:16:14,476][00394] Avg episode rewards: #0: 26.405, true rewards: #0: 11.905 +[2023-02-27 12:16:14,479][00394] Avg episode reward: 26.405, avg true_objective: 11.905 +[2023-02-27 12:16:14,512][00394] Num frames 2400... +[2023-02-27 12:16:14,680][00394] Num frames 2500... +[2023-02-27 12:16:14,819][00394] Num frames 2600... +[2023-02-27 12:16:14,954][00394] Num frames 2700... +[2023-02-27 12:16:15,080][00394] Num frames 2800... +[2023-02-27 12:16:15,210][00394] Num frames 2900... +[2023-02-27 12:16:15,331][00394] Num frames 3000... +[2023-02-27 12:16:15,467][00394] Num frames 3100... +[2023-02-27 12:16:15,589][00394] Num frames 3200... +[2023-02-27 12:16:15,717][00394] Num frames 3300... +[2023-02-27 12:16:15,845][00394] Num frames 3400... +[2023-02-27 12:16:15,974][00394] Num frames 3500... +[2023-02-27 12:16:16,120][00394] Num frames 3600... +[2023-02-27 12:16:16,244][00394] Num frames 3700... +[2023-02-27 12:16:16,364][00394] Num frames 3800... +[2023-02-27 12:16:16,495][00394] Num frames 3900... +[2023-02-27 12:16:16,617][00394] Num frames 4000... +[2023-02-27 12:16:16,737][00394] Num frames 4100... +[2023-02-27 12:16:16,871][00394] Num frames 4200... +[2023-02-27 12:16:16,993][00394] Num frames 4300... +[2023-02-27 12:16:17,090][00394] Avg episode rewards: #0: 33.443, true rewards: #0: 14.443 +[2023-02-27 12:16:17,093][00394] Avg episode reward: 33.443, avg true_objective: 14.443 +[2023-02-27 12:16:17,175][00394] Num frames 4400... +[2023-02-27 12:16:17,298][00394] Num frames 4500... +[2023-02-27 12:16:17,429][00394] Num frames 4600... +[2023-02-27 12:16:17,557][00394] Num frames 4700... +[2023-02-27 12:16:17,677][00394] Num frames 4800... +[2023-02-27 12:16:17,804][00394] Num frames 4900... +[2023-02-27 12:16:17,926][00394] Num frames 5000... +[2023-02-27 12:16:18,051][00394] Num frames 5100... +[2023-02-27 12:16:18,174][00394] Num frames 5200... +[2023-02-27 12:16:18,262][00394] Avg episode rewards: #0: 30.297, true rewards: #0: 13.047 +[2023-02-27 12:16:18,264][00394] Avg episode reward: 30.297, avg true_objective: 13.047 +[2023-02-27 12:16:18,368][00394] Num frames 5300... +[2023-02-27 12:16:18,503][00394] Num frames 5400... +[2023-02-27 12:16:18,623][00394] Num frames 5500... +[2023-02-27 12:16:18,748][00394] Num frames 5600... +[2023-02-27 12:16:18,866][00394] Num frames 5700... +[2023-02-27 12:16:18,992][00394] Num frames 5800... +[2023-02-27 12:16:19,109][00394] Avg episode rewards: #0: 26.500, true rewards: #0: 11.700 +[2023-02-27 12:16:19,111][00394] Avg episode reward: 26.500, avg true_objective: 11.700 +[2023-02-27 12:16:19,174][00394] Num frames 5900... +[2023-02-27 12:16:19,299][00394] Num frames 6000... +[2023-02-27 12:16:19,421][00394] Num frames 6100... +[2023-02-27 12:16:19,552][00394] Num frames 6200... +[2023-02-27 12:16:19,675][00394] Num frames 6300... +[2023-02-27 12:16:19,797][00394] Num frames 6400... +[2023-02-27 12:16:19,921][00394] Num frames 6500... +[2023-02-27 12:16:20,041][00394] Num frames 6600... +[2023-02-27 12:16:20,166][00394] Num frames 6700... +[2023-02-27 12:16:20,288][00394] Num frames 6800... +[2023-02-27 12:16:20,413][00394] Num frames 6900... +[2023-02-27 12:16:20,561][00394] Avg episode rewards: #0: 26.117, true rewards: #0: 11.617 +[2023-02-27 12:16:20,563][00394] Avg episode reward: 26.117, avg true_objective: 11.617 +[2023-02-27 12:16:20,605][00394] Num frames 7000... +[2023-02-27 12:16:20,725][00394] Num frames 7100... +[2023-02-27 12:16:20,854][00394] Num frames 7200... +[2023-02-27 12:16:20,974][00394] Num frames 7300... +[2023-02-27 12:16:21,101][00394] Num frames 7400... +[2023-02-27 12:16:21,257][00394] Avg episode rewards: #0: 24.117, true rewards: #0: 10.689 +[2023-02-27 12:16:21,259][00394] Avg episode reward: 24.117, avg true_objective: 10.689 +[2023-02-27 12:16:21,285][00394] Num frames 7500... +[2023-02-27 12:16:21,436][00394] Num frames 7600... +[2023-02-27 12:16:21,570][00394] Num frames 7700... +[2023-02-27 12:16:21,691][00394] Num frames 7800... +[2023-02-27 12:16:21,818][00394] Num frames 7900... +[2023-02-27 12:16:21,937][00394] Num frames 8000... +[2023-02-27 12:16:22,067][00394] Num frames 8100... +[2023-02-27 12:16:22,190][00394] Num frames 8200... +[2023-02-27 12:16:22,318][00394] Num frames 8300... +[2023-02-27 12:16:22,445][00394] Num frames 8400... +[2023-02-27 12:16:22,557][00394] Avg episode rewards: #0: 23.303, true rewards: #0: 10.552 +[2023-02-27 12:16:22,559][00394] Avg episode reward: 23.303, avg true_objective: 10.552 +[2023-02-27 12:16:22,633][00394] Num frames 8500... +[2023-02-27 12:16:22,760][00394] Num frames 8600... +[2023-02-27 12:16:22,880][00394] Num frames 8700... +[2023-02-27 12:16:23,007][00394] Num frames 8800... +[2023-02-27 12:16:23,133][00394] Num frames 8900... +[2023-02-27 12:16:23,260][00394] Num frames 9000... +[2023-02-27 12:16:23,385][00394] Num frames 9100... +[2023-02-27 12:16:23,505][00394] Num frames 9200... +[2023-02-27 12:16:23,637][00394] Num frames 9300... +[2023-02-27 12:16:23,758][00394] Num frames 9400... +[2023-02-27 12:16:23,893][00394] Num frames 9500... +[2023-02-27 12:16:23,964][00394] Avg episode rewards: #0: 23.233, true rewards: #0: 10.567 +[2023-02-27 12:16:23,966][00394] Avg episode reward: 23.233, avg true_objective: 10.567 +[2023-02-27 12:16:24,080][00394] Num frames 9600... +[2023-02-27 12:16:24,212][00394] Num frames 9700... +[2023-02-27 12:16:24,336][00394] Num frames 9800... +[2023-02-27 12:16:24,464][00394] Num frames 9900... +[2023-02-27 12:16:24,592][00394] Num frames 10000... +[2023-02-27 12:16:24,714][00394] Num frames 10100... +[2023-02-27 12:16:24,884][00394] Num frames 10200... +[2023-02-27 12:16:25,085][00394] Num frames 10300... +[2023-02-27 12:16:25,254][00394] Num frames 10400... +[2023-02-27 12:16:25,460][00394] Num frames 10500... +[2023-02-27 12:16:25,633][00394] Num frames 10600... +[2023-02-27 12:16:25,805][00394] Num frames 10700... +[2023-02-27 12:16:25,972][00394] Num frames 10800... +[2023-02-27 12:16:26,184][00394] Avg episode rewards: #0: 23.986, true rewards: #0: 10.886 +[2023-02-27 12:16:26,187][00394] Avg episode reward: 23.986, avg true_objective: 10.886 +[2023-02-27 12:17:36,340][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-27 12:17:36,956][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 12:17:36,958][00394] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-27 12:17:36,960][00394] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-27 12:17:36,962][00394] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-27 12:17:36,964][00394] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:17:36,966][00394] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-27 12:17:36,968][00394] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-27 12:17:36,969][00394] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-27 12:17:36,970][00394] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-27 12:17:36,971][00394] Adding new argument 'hf_repository'='Clawoo/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-27 12:17:36,972][00394] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-27 12:17:36,973][00394] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-27 12:17:36,974][00394] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-27 12:17:36,976][00394] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-27 12:17:36,979][00394] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-27 12:17:36,999][00394] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:17:37,002][00394] RunningMeanStd input shape: (1,) +[2023-02-27 12:17:37,026][00394] ConvEncoder: input_channels=3 +[2023-02-27 12:17:37,095][00394] Conv encoder output size: 512 +[2023-02-27 12:17:37,097][00394] Policy head output size: 512 +[2023-02-27 12:17:37,125][00394] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth... +[2023-02-27 12:17:37,974][00394] Num frames 100... +[2023-02-27 12:17:38,186][00394] Num frames 200... +[2023-02-27 12:17:38,421][00394] Num frames 300... +[2023-02-27 12:17:38,603][00394] Num frames 400... +[2023-02-27 12:17:38,816][00394] Num frames 500... +[2023-02-27 12:17:38,990][00394] Num frames 600... +[2023-02-27 12:17:39,174][00394] Num frames 700... +[2023-02-27 12:17:39,359][00394] Num frames 800... +[2023-02-27 12:17:39,534][00394] Num frames 900... +[2023-02-27 12:17:39,699][00394] Num frames 1000... +[2023-02-27 12:17:39,876][00394] Num frames 1100... +[2023-02-27 12:17:40,061][00394] Num frames 1200... +[2023-02-27 12:17:40,244][00394] Num frames 1300... +[2023-02-27 12:17:40,416][00394] Num frames 1400... +[2023-02-27 12:17:40,581][00394] Num frames 1500... +[2023-02-27 12:17:40,748][00394] Num frames 1600... +[2023-02-27 12:17:40,948][00394] Num frames 1700... +[2023-02-27 12:17:41,161][00394] Avg episode rewards: #0: 42.749, true rewards: #0: 17.750 +[2023-02-27 12:17:41,163][00394] Avg episode reward: 42.749, avg true_objective: 17.750 +[2023-02-27 12:17:41,219][00394] Num frames 1800... +[2023-02-27 12:17:41,426][00394] Num frames 1900... +[2023-02-27 12:17:41,647][00394] Num frames 2000... +[2023-02-27 12:17:41,853][00394] Num frames 2100... +[2023-02-27 12:17:42,064][00394] Num frames 2200... +[2023-02-27 12:17:42,291][00394] Num frames 2300... +[2023-02-27 12:17:42,514][00394] Num frames 2400... +[2023-02-27 12:17:42,714][00394] Num frames 2500... +[2023-02-27 12:17:42,930][00394] Num frames 2600... +[2023-02-27 12:17:43,146][00394] Num frames 2700... +[2023-02-27 12:17:43,329][00394] Num frames 2800... +[2023-02-27 12:17:43,507][00394] Num frames 2900... +[2023-02-27 12:17:43,695][00394] Num frames 3000... +[2023-02-27 12:17:43,792][00394] Avg episode rewards: #0: 33.115, true rewards: #0: 15.115 +[2023-02-27 12:17:43,794][00394] Avg episode reward: 33.115, avg true_objective: 15.115 +[2023-02-27 12:17:43,971][00394] Num frames 3100... +[2023-02-27 12:17:44,176][00394] Num frames 3200... +[2023-02-27 12:17:44,379][00394] Num frames 3300... +[2023-02-27 12:17:44,558][00394] Num frames 3400... +[2023-02-27 12:17:44,731][00394] Num frames 3500... +[2023-02-27 12:17:44,896][00394] Num frames 3600... +[2023-02-27 12:17:45,071][00394] Num frames 3700... +[2023-02-27 12:17:45,244][00394] Num frames 3800... +[2023-02-27 12:17:45,409][00394] Num frames 3900... +[2023-02-27 12:17:45,493][00394] Avg episode rewards: #0: 28.383, true rewards: #0: 13.050 +[2023-02-27 12:17:45,495][00394] Avg episode reward: 28.383, avg true_objective: 13.050 +[2023-02-27 12:17:45,632][00394] Num frames 4000... +[2023-02-27 12:17:45,801][00394] Num frames 4100... +[2023-02-27 12:17:45,922][00394] Num frames 4200... +[2023-02-27 12:17:46,040][00394] Num frames 4300... +[2023-02-27 12:17:46,147][00394] Avg episode rewards: #0: 23.602, true rewards: #0: 10.852 +[2023-02-27 12:17:46,149][00394] Avg episode reward: 23.602, avg true_objective: 10.852 +[2023-02-27 12:17:46,221][00394] Num frames 4400... +[2023-02-27 12:17:46,339][00394] Num frames 4500... +[2023-02-27 12:17:46,466][00394] Num frames 4600... +[2023-02-27 12:17:46,590][00394] Num frames 4700... +[2023-02-27 12:17:46,709][00394] Num frames 4800... +[2023-02-27 12:17:46,828][00394] Avg episode rewards: #0: 20.906, true rewards: #0: 9.706 +[2023-02-27 12:17:46,830][00394] Avg episode reward: 20.906, avg true_objective: 9.706 +[2023-02-27 12:17:46,889][00394] Num frames 4900... +[2023-02-27 12:17:47,007][00394] Num frames 5000... +[2023-02-27 12:17:47,116][00394] Avg episode rewards: #0: 17.908, true rewards: #0: 8.408 +[2023-02-27 12:17:47,119][00394] Avg episode reward: 17.908, avg true_objective: 8.408 +[2023-02-27 12:17:47,194][00394] Num frames 5100... +[2023-02-27 12:17:47,317][00394] Num frames 5200... +[2023-02-27 12:17:47,441][00394] Num frames 5300... +[2023-02-27 12:17:47,572][00394] Num frames 5400... +[2023-02-27 12:17:47,697][00394] Num frames 5500... +[2023-02-27 12:17:47,817][00394] Num frames 5600... +[2023-02-27 12:17:47,939][00394] Num frames 5700... +[2023-02-27 12:17:48,058][00394] Num frames 5800... +[2023-02-27 12:17:48,191][00394] Num frames 5900... +[2023-02-27 12:17:48,313][00394] Num frames 6000... +[2023-02-27 12:17:48,434][00394] Num frames 6100... +[2023-02-27 12:17:48,554][00394] Num frames 6200... +[2023-02-27 12:17:48,677][00394] Num frames 6300... +[2023-02-27 12:17:48,799][00394] Num frames 6400... +[2023-02-27 12:17:48,920][00394] Num frames 6500... +[2023-02-27 12:17:49,093][00394] Avg episode rewards: #0: 21.136, true rewards: #0: 9.421 +[2023-02-27 12:17:49,094][00394] Avg episode reward: 21.136, avg true_objective: 9.421 +[2023-02-27 12:17:49,108][00394] Num frames 6600... +[2023-02-27 12:17:49,235][00394] Num frames 6700... +[2023-02-27 12:17:49,358][00394] Num frames 6800... +[2023-02-27 12:17:49,483][00394] Num frames 6900... +[2023-02-27 12:17:49,607][00394] Num frames 7000... +[2023-02-27 12:17:49,715][00394] Avg episode rewards: #0: 19.554, true rewards: #0: 8.804 +[2023-02-27 12:17:49,717][00394] Avg episode reward: 19.554, avg true_objective: 8.804 +[2023-02-27 12:17:49,791][00394] Num frames 7100... +[2023-02-27 12:17:49,921][00394] Num frames 7200... +[2023-02-27 12:17:50,044][00394] Num frames 7300... +[2023-02-27 12:17:50,166][00394] Num frames 7400... +[2023-02-27 12:17:50,293][00394] Num frames 7500... +[2023-02-27 12:17:50,409][00394] Num frames 7600... +[2023-02-27 12:17:50,530][00394] Num frames 7700... +[2023-02-27 12:17:50,652][00394] Num frames 7800... +[2023-02-27 12:17:50,773][00394] Num frames 7900... +[2023-02-27 12:17:50,893][00394] Num frames 8000... +[2023-02-27 12:17:51,011][00394] Num frames 8100... +[2023-02-27 12:17:51,156][00394] Avg episode rewards: #0: 20.194, true rewards: #0: 9.083 +[2023-02-27 12:17:51,158][00394] Avg episode reward: 20.194, avg true_objective: 9.083 +[2023-02-27 12:17:51,191][00394] Num frames 8200... +[2023-02-27 12:17:51,315][00394] Num frames 8300... +[2023-02-27 12:17:51,432][00394] Num frames 8400... +[2023-02-27 12:17:51,549][00394] Num frames 8500... +[2023-02-27 12:17:51,686][00394] Num frames 8600... +[2023-02-27 12:17:51,806][00394] Avg episode rewards: #0: 18.855, true rewards: #0: 8.655 +[2023-02-27 12:17:51,808][00394] Avg episode reward: 18.855, avg true_objective: 8.655 +[2023-02-27 12:18:46,740][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4!