diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -4467,3 +4467,1303 @@ main_loop: 1567.2632 [2023-02-27 12:17:51,806][00394] Avg episode rewards: #0: 18.855, true rewards: #0: 8.655 [2023-02-27 12:17:51,808][00394] Avg episode reward: 18.855, avg true_objective: 8.655 [2023-02-27 12:18:46,740][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-27 12:18:50,489][00394] The model has been pushed to https://huggingface.co/Clawoo/rl_course_vizdoom_health_gathering_supreme +[2023-02-27 12:22:07,509][00394] Environment doom_basic already registered, overwriting... +[2023-02-27 12:22:07,512][00394] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-27 12:22:07,514][00394] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-27 12:22:07,515][00394] Environment doom_dm already registered, overwriting... +[2023-02-27 12:22:07,517][00394] Environment doom_dwango5 already registered, overwriting... +[2023-02-27 12:22:07,519][00394] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-27 12:22:07,521][00394] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-27 12:22:07,528][00394] Environment doom_my_way_home already registered, overwriting... +[2023-02-27 12:22:07,532][00394] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-27 12:22:07,533][00394] Environment doom_defend_the_center already registered, overwriting... +[2023-02-27 12:22:07,535][00394] Environment doom_defend_the_line already registered, overwriting... +[2023-02-27 12:22:07,536][00394] Environment doom_health_gathering already registered, overwriting... +[2023-02-27 12:22:07,539][00394] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-27 12:22:07,541][00394] Environment doom_battle already registered, overwriting... +[2023-02-27 12:22:07,543][00394] Environment doom_battle2 already registered, overwriting... +[2023-02-27 12:22:07,545][00394] Environment doom_duel_bots already registered, overwriting... +[2023-02-27 12:22:07,547][00394] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-27 12:22:07,548][00394] Environment doom_duel already registered, overwriting... +[2023-02-27 12:22:07,550][00394] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-27 12:22:07,552][00394] Environment doom_benchmark already registered, overwriting... +[2023-02-27 12:22:07,559][00394] register_encoder_factory: +[2023-02-27 12:22:07,584][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 12:22:07,587][00394] Overriding arg 'train_for_env_steps' with value 16000000 passed from command line +[2023-02-27 12:22:07,592][00394] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-27 12:22:07,594][00394] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-27 12:22:07,596][00394] Weights and Biases integration disabled +[2023-02-27 12:22:07,603][00394] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-27 12:22:11,230][00394] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=16000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-27 12:22:11,234][00394] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-27 12:22:11,236][00394] Rollout worker 0 uses device cpu +[2023-02-27 12:22:11,239][00394] Rollout worker 1 uses device cpu +[2023-02-27 12:22:11,240][00394] Rollout worker 2 uses device cpu +[2023-02-27 12:22:11,242][00394] Rollout worker 3 uses device cpu +[2023-02-27 12:22:11,243][00394] Rollout worker 4 uses device cpu +[2023-02-27 12:22:11,244][00394] Rollout worker 5 uses device cpu +[2023-02-27 12:22:11,246][00394] Rollout worker 6 uses device cpu +[2023-02-27 12:22:11,247][00394] Rollout worker 7 uses device cpu +[2023-02-27 12:22:11,409][00394] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 12:22:11,414][00394] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-27 12:22:11,463][00394] Starting all processes... +[2023-02-27 12:22:11,465][00394] Starting process learner_proc0 +[2023-02-27 12:22:11,685][00394] Starting all processes... +[2023-02-27 12:22:11,698][00394] Starting process inference_proc0-0 +[2023-02-27 12:22:11,698][00394] Starting process rollout_proc0 +[2023-02-27 12:22:11,702][00394] Starting process rollout_proc1 +[2023-02-27 12:22:11,706][00394] Starting process rollout_proc2 +[2023-02-27 12:22:11,706][00394] Starting process rollout_proc3 +[2023-02-27 12:22:11,706][00394] Starting process rollout_proc4 +[2023-02-27 12:22:11,706][00394] Starting process rollout_proc5 +[2023-02-27 12:22:11,706][00394] Starting process rollout_proc6 +[2023-02-27 12:22:11,860][00394] Starting process rollout_proc7 +[2023-02-27 12:22:21,684][47447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 12:22:21,684][47447] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-27 12:22:21,750][47447] Num visible devices: 1 +[2023-02-27 12:22:21,788][47447] Starting seed is not provided +[2023-02-27 12:22:21,788][47447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 12:22:21,789][47447] Initializing actor-critic model on device cuda:0 +[2023-02-27 12:22:21,789][47447] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:22:21,795][47447] RunningMeanStd input shape: (1,) +[2023-02-27 12:22:21,911][47447] ConvEncoder: input_channels=3 +[2023-02-27 12:22:22,815][47465] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 12:22:22,817][47465] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-27 12:22:22,914][47466] Worker 1 uses CPU cores [1] +[2023-02-27 12:22:22,932][47465] Num visible devices: 1 +[2023-02-27 12:22:23,107][47447] Conv encoder output size: 512 +[2023-02-27 12:22:23,111][47447] Policy head output size: 512 +[2023-02-27 12:22:23,198][47447] Created Actor Critic model with architecture: +[2023-02-27 12:22:23,200][47447] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-27 12:22:23,562][47470] Worker 0 uses CPU cores [0] +[2023-02-27 12:22:23,767][47467] Worker 2 uses CPU cores [0] +[2023-02-27 12:22:24,208][47476] Worker 3 uses CPU cores [1] +[2023-02-27 12:22:24,305][47494] Worker 6 uses CPU cores [0] +[2023-02-27 12:22:24,454][47480] Worker 4 uses CPU cores [0] +[2023-02-27 12:22:24,550][47486] Worker 7 uses CPU cores [1] +[2023-02-27 12:22:24,658][47488] Worker 5 uses CPU cores [1] +[2023-02-27 12:22:31,358][47447] Using optimizer +[2023-02-27 12:22:31,359][47447] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth... +[2023-02-27 12:22:31,397][47447] Loading model from checkpoint +[2023-02-27 12:22:31,399][00394] Heartbeat connected on Batcher_0 +[2023-02-27 12:22:31,407][47447] Loaded experiment state at self.train_step=2931, self.env_steps=12005376 +[2023-02-27 12:22:31,408][47447] Initialized policy 0 weights for model version 2931 +[2023-02-27 12:22:31,409][00394] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-27 12:22:31,416][47447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-27 12:22:31,423][47447] LearnerWorker_p0 finished initialization! +[2023-02-27 12:22:31,426][00394] Heartbeat connected on LearnerWorker_p0 +[2023-02-27 12:22:31,432][00394] Heartbeat connected on RolloutWorker_w0 +[2023-02-27 12:22:31,441][00394] Heartbeat connected on RolloutWorker_w2 +[2023-02-27 12:22:31,449][00394] Heartbeat connected on RolloutWorker_w1 +[2023-02-27 12:22:31,459][00394] Heartbeat connected on RolloutWorker_w4 +[2023-02-27 12:22:31,470][00394] Heartbeat connected on RolloutWorker_w3 +[2023-02-27 12:22:31,480][00394] Heartbeat connected on RolloutWorker_w6 +[2023-02-27 12:22:31,545][00394] Heartbeat connected on RolloutWorker_w5 +[2023-02-27 12:22:31,550][00394] Heartbeat connected on RolloutWorker_w7 +[2023-02-27 12:22:31,624][47465] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:22:31,626][47465] RunningMeanStd input shape: (1,) +[2023-02-27 12:22:31,643][47465] ConvEncoder: input_channels=3 +[2023-02-27 12:22:31,743][47465] Conv encoder output size: 512 +[2023-02-27 12:22:31,744][47465] Policy head output size: 512 +[2023-02-27 12:22:32,603][00394] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 12005376. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 12:22:34,158][00394] Inference worker 0-0 is ready! +[2023-02-27 12:22:34,161][00394] All inference workers are ready! Signal rollout workers to start! +[2023-02-27 12:22:34,292][47466] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,301][47488] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,298][47486] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,311][47476] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,338][47470] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,333][47480] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,336][47467] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,339][47494] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-27 12:22:34,976][47480] Decorrelating experience for 0 frames... +[2023-02-27 12:22:35,639][47466] Decorrelating experience for 0 frames... +[2023-02-27 12:22:35,641][47486] Decorrelating experience for 0 frames... +[2023-02-27 12:22:35,643][47476] Decorrelating experience for 0 frames... +[2023-02-27 12:22:35,646][47488] Decorrelating experience for 0 frames... +[2023-02-27 12:22:36,025][47494] Decorrelating experience for 0 frames... +[2023-02-27 12:22:36,761][47494] Decorrelating experience for 32 frames... +[2023-02-27 12:22:36,805][47480] Decorrelating experience for 32 frames... +[2023-02-27 12:22:37,061][47488] Decorrelating experience for 32 frames... +[2023-02-27 12:22:37,063][47466] Decorrelating experience for 32 frames... +[2023-02-27 12:22:37,065][47486] Decorrelating experience for 32 frames... +[2023-02-27 12:22:37,069][47476] Decorrelating experience for 32 frames... +[2023-02-27 12:22:37,603][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12005376. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 12:22:37,735][47486] Decorrelating experience for 64 frames... +[2023-02-27 12:22:37,856][47470] Decorrelating experience for 0 frames... +[2023-02-27 12:22:38,211][47480] Decorrelating experience for 64 frames... +[2023-02-27 12:22:38,226][47467] Decorrelating experience for 0 frames... +[2023-02-27 12:22:38,496][47476] Decorrelating experience for 64 frames... +[2023-02-27 12:22:38,731][47488] Decorrelating experience for 64 frames... +[2023-02-27 12:22:38,930][47470] Decorrelating experience for 32 frames... +[2023-02-27 12:22:39,256][47466] Decorrelating experience for 64 frames... +[2023-02-27 12:22:39,395][47494] Decorrelating experience for 64 frames... +[2023-02-27 12:22:39,780][47488] Decorrelating experience for 96 frames... +[2023-02-27 12:22:39,995][47480] Decorrelating experience for 96 frames... +[2023-02-27 12:22:40,201][47476] Decorrelating experience for 96 frames... +[2023-02-27 12:22:40,600][47467] Decorrelating experience for 32 frames... +[2023-02-27 12:22:41,058][47466] Decorrelating experience for 96 frames... +[2023-02-27 12:22:41,352][47486] Decorrelating experience for 96 frames... +[2023-02-27 12:22:41,900][47470] Decorrelating experience for 64 frames... +[2023-02-27 12:22:42,077][47494] Decorrelating experience for 96 frames... +[2023-02-27 12:22:42,606][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12005376. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 12:22:42,896][47467] Decorrelating experience for 64 frames... +[2023-02-27 12:22:43,392][47470] Decorrelating experience for 96 frames... +[2023-02-27 12:22:43,868][47467] Decorrelating experience for 96 frames... +[2023-02-27 12:22:47,604][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12005376. Throughput: 0: 67.3. Samples: 1010. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 12:22:47,612][00394] Avg episode reward: [(0, '2.160')] +[2023-02-27 12:22:49,596][47447] Signal inference workers to stop experience collection... +[2023-02-27 12:22:49,633][47465] InferenceWorker_p0-w0: stopping experience collection +[2023-02-27 12:22:52,608][00394] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12005376. Throughput: 0: 128.9. Samples: 2578. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-27 12:22:52,655][00394] Avg episode reward: [(0, '4.700')] +[2023-02-27 12:22:52,871][47447] Signal inference workers to resume experience collection... +[2023-02-27 12:22:52,873][47465] InferenceWorker_p0-w0: resuming experience collection +[2023-02-27 12:22:57,603][00394] Fps is (10 sec: 2048.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 12025856. Throughput: 0: 154.1. Samples: 3852. Policy #0 lag: (min: 0.0, avg: 0.6, max: 3.0) +[2023-02-27 12:22:57,606][00394] Avg episode reward: [(0, '8.121')] +[2023-02-27 12:23:02,606][00394] Fps is (10 sec: 3685.3, 60 sec: 1228.7, 300 sec: 1228.7). Total num frames: 12042240. Throughput: 0: 318.6. Samples: 9560. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:23:02,616][00394] Avg episode reward: [(0, '12.387')] +[2023-02-27 12:23:02,884][47465] Updated weights for policy 0, policy_version 2941 (0.0022) +[2023-02-27 12:23:07,608][00394] Fps is (10 sec: 3275.3, 60 sec: 1521.2, 300 sec: 1521.2). Total num frames: 12058624. Throughput: 0: 388.8. Samples: 13610. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:23:07,611][00394] Avg episode reward: [(0, '13.765')] +[2023-02-27 12:23:12,603][00394] Fps is (10 sec: 2868.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 12070912. Throughput: 0: 392.0. Samples: 15682. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-27 12:23:12,606][00394] Avg episode reward: [(0, '17.453')] +[2023-02-27 12:23:15,603][47465] Updated weights for policy 0, policy_version 2951 (0.0012) +[2023-02-27 12:23:17,603][00394] Fps is (10 sec: 3688.1, 60 sec: 2002.5, 300 sec: 2002.5). Total num frames: 12095488. Throughput: 0: 483.4. Samples: 21754. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:23:17,610][00394] Avg episode reward: [(0, '18.932')] +[2023-02-27 12:23:22,603][00394] Fps is (10 sec: 4096.0, 60 sec: 2129.9, 300 sec: 2129.9). Total num frames: 12111872. Throughput: 0: 616.2. Samples: 27728. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:23:22,610][00394] Avg episode reward: [(0, '19.905')] +[2023-02-27 12:23:27,132][47465] Updated weights for policy 0, policy_version 2961 (0.0013) +[2023-02-27 12:23:27,603][00394] Fps is (10 sec: 3276.8, 60 sec: 2234.2, 300 sec: 2234.2). Total num frames: 12128256. Throughput: 0: 661.9. Samples: 29782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:23:27,612][00394] Avg episode reward: [(0, '22.022')] +[2023-02-27 12:23:32,603][00394] Fps is (10 sec: 2867.2, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 12140544. Throughput: 0: 727.1. Samples: 33728. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-27 12:23:32,609][00394] Avg episode reward: [(0, '24.064')] +[2023-02-27 12:23:37,603][00394] Fps is (10 sec: 3276.8, 60 sec: 2594.1, 300 sec: 2394.6). Total num frames: 12161024. Throughput: 0: 828.9. Samples: 39880. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:23:37,611][00394] Avg episode reward: [(0, '25.580')] +[2023-02-27 12:23:37,653][47447] Saving new best policy, reward=25.580! +[2023-02-27 12:23:38,651][47465] Updated weights for policy 0, policy_version 2971 (0.0014) +[2023-02-27 12:23:42,607][00394] Fps is (10 sec: 4094.3, 60 sec: 2935.4, 300 sec: 2516.0). Total num frames: 12181504. Throughput: 0: 870.5. Samples: 43028. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:23:42,610][00394] Avg episode reward: [(0, '24.474')] +[2023-02-27 12:23:47,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3208.6, 300 sec: 2566.8). Total num frames: 12197888. Throughput: 0: 849.9. Samples: 47802. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-27 12:23:47,606][00394] Avg episode reward: [(0, '23.995')] +[2023-02-27 12:23:51,890][47465] Updated weights for policy 0, policy_version 2981 (0.0029) +[2023-02-27 12:23:52,604][00394] Fps is (10 sec: 2868.1, 60 sec: 3413.3, 300 sec: 2560.0). Total num frames: 12210176. Throughput: 0: 851.7. Samples: 51932. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:23:52,614][00394] Avg episode reward: [(0, '23.079')] +[2023-02-27 12:23:57,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 2650.4). Total num frames: 12230656. Throughput: 0: 870.2. Samples: 54840. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:23:57,605][00394] Avg episode reward: [(0, '22.356')] +[2023-02-27 12:24:01,698][47465] Updated weights for policy 0, policy_version 2991 (0.0014) +[2023-02-27 12:24:02,603][00394] Fps is (10 sec: 4096.4, 60 sec: 3481.8, 300 sec: 2730.7). Total num frames: 12251136. Throughput: 0: 881.3. Samples: 61412. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:24:02,605][00394] Avg episode reward: [(0, '20.398')] +[2023-02-27 12:24:07,609][00394] Fps is (10 sec: 3684.1, 60 sec: 3481.5, 300 sec: 2759.2). Total num frames: 12267520. Throughput: 0: 853.7. Samples: 66150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:24:07,619][00394] Avg episode reward: [(0, '20.733')] +[2023-02-27 12:24:07,633][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002995_12267520.pth... +[2023-02-27 12:24:07,862][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002925_11980800.pth +[2023-02-27 12:24:12,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 2744.3). Total num frames: 12279808. Throughput: 0: 852.8. Samples: 68158. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:12,606][00394] Avg episode reward: [(0, '20.286')] +[2023-02-27 12:24:15,352][47465] Updated weights for policy 0, policy_version 3001 (0.0014) +[2023-02-27 12:24:17,603][00394] Fps is (10 sec: 3278.7, 60 sec: 3413.3, 300 sec: 2808.7). Total num frames: 12300288. Throughput: 0: 876.1. Samples: 73154. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:24:17,606][00394] Avg episode reward: [(0, '20.217')] +[2023-02-27 12:24:22,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 2867.2). Total num frames: 12320768. Throughput: 0: 885.6. Samples: 79732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:22,606][00394] Avg episode reward: [(0, '21.842')] +[2023-02-27 12:24:25,218][47465] Updated weights for policy 0, policy_version 3011 (0.0015) +[2023-02-27 12:24:27,603][00394] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 2885.0). Total num frames: 12337152. Throughput: 0: 876.8. Samples: 82480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:27,607][00394] Avg episode reward: [(0, '22.466')] +[2023-02-27 12:24:32,604][00394] Fps is (10 sec: 3276.6, 60 sec: 3549.8, 300 sec: 2901.3). Total num frames: 12353536. Throughput: 0: 860.8. Samples: 86538. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:24:32,608][00394] Avg episode reward: [(0, '23.469')] +[2023-02-27 12:24:37,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 2916.4). Total num frames: 12369920. Throughput: 0: 882.9. Samples: 91662. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:24:37,609][00394] Avg episode reward: [(0, '22.642')] +[2023-02-27 12:24:38,131][47465] Updated weights for policy 0, policy_version 3021 (0.0012) +[2023-02-27 12:24:42,603][00394] Fps is (10 sec: 3686.7, 60 sec: 3481.8, 300 sec: 2961.7). Total num frames: 12390400. Throughput: 0: 891.2. Samples: 94944. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:24:42,609][00394] Avg episode reward: [(0, '23.363')] +[2023-02-27 12:24:47,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 2973.4). Total num frames: 12406784. Throughput: 0: 874.7. Samples: 100772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:47,610][00394] Avg episode reward: [(0, '23.392')] +[2023-02-27 12:24:49,609][47465] Updated weights for policy 0, policy_version 3031 (0.0022) +[2023-02-27 12:24:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 2984.2). Total num frames: 12423168. Throughput: 0: 860.2. Samples: 104854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:52,610][00394] Avg episode reward: [(0, '22.012')] +[2023-02-27 12:24:57,603][00394] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 2994.3). Total num frames: 12439552. Throughput: 0: 863.5. Samples: 107016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:24:57,606][00394] Avg episode reward: [(0, '21.782')] +[2023-02-27 12:25:00,789][47465] Updated weights for policy 0, policy_version 3041 (0.0019) +[2023-02-27 12:25:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3031.0). Total num frames: 12460032. Throughput: 0: 899.9. Samples: 113650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:25:02,611][00394] Avg episode reward: [(0, '22.628')] +[2023-02-27 12:25:07,603][00394] Fps is (10 sec: 4096.1, 60 sec: 3550.2, 300 sec: 3065.4). Total num frames: 12480512. Throughput: 0: 877.6. Samples: 119222. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:25:07,611][00394] Avg episode reward: [(0, '22.852')] +[2023-02-27 12:25:12,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3046.4). Total num frames: 12492800. Throughput: 0: 863.2. Samples: 121322. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:25:12,607][00394] Avg episode reward: [(0, '22.165')] +[2023-02-27 12:25:13,414][47465] Updated weights for policy 0, policy_version 3051 (0.0024) +[2023-02-27 12:25:17,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3053.4). Total num frames: 12509184. Throughput: 0: 867.8. Samples: 125588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:25:17,609][00394] Avg episode reward: [(0, '22.673')] +[2023-02-27 12:25:22,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3108.1). Total num frames: 12533760. Throughput: 0: 902.4. Samples: 132268. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:25:22,609][00394] Avg episode reward: [(0, '22.113')] +[2023-02-27 12:25:23,308][47465] Updated weights for policy 0, policy_version 3061 (0.0013) +[2023-02-27 12:25:27,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3113.0). Total num frames: 12550144. Throughput: 0: 903.4. Samples: 135596. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-27 12:25:27,611][00394] Avg episode reward: [(0, '22.524')] +[2023-02-27 12:25:32,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3117.5). Total num frames: 12566528. Throughput: 0: 867.4. Samples: 139804. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:25:32,608][00394] Avg episode reward: [(0, '23.094')] +[2023-02-27 12:25:36,473][47465] Updated weights for policy 0, policy_version 3071 (0.0019) +[2023-02-27 12:25:37,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3121.8). Total num frames: 12582912. Throughput: 0: 883.2. Samples: 144598. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:25:37,606][00394] Avg episode reward: [(0, '23.718')] +[2023-02-27 12:25:42,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3147.5). Total num frames: 12603392. Throughput: 0: 908.2. Samples: 147884. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:25:42,609][00394] Avg episode reward: [(0, '23.177')] +[2023-02-27 12:25:45,898][47465] Updated weights for policy 0, policy_version 3081 (0.0018) +[2023-02-27 12:25:47,609][00394] Fps is (10 sec: 4093.5, 60 sec: 3617.8, 300 sec: 3171.7). Total num frames: 12623872. Throughput: 0: 904.0. Samples: 154336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:25:47,616][00394] Avg episode reward: [(0, '23.834')] +[2023-02-27 12:25:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3153.9). Total num frames: 12636160. Throughput: 0: 870.2. Samples: 158380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:25:52,607][00394] Avg episode reward: [(0, '25.568')] +[2023-02-27 12:25:57,603][00394] Fps is (10 sec: 2868.9, 60 sec: 3549.9, 300 sec: 3156.9). Total num frames: 12652544. Throughput: 0: 868.6. Samples: 160410. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:25:57,609][00394] Avg episode reward: [(0, '23.959')] +[2023-02-27 12:25:59,128][47465] Updated weights for policy 0, policy_version 3091 (0.0019) +[2023-02-27 12:26:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3179.3). Total num frames: 12673024. Throughput: 0: 915.1. Samples: 166768. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:26:02,608][00394] Avg episode reward: [(0, '23.388')] +[2023-02-27 12:26:07,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3200.6). Total num frames: 12693504. Throughput: 0: 899.4. Samples: 172742. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:26:07,607][00394] Avg episode reward: [(0, '23.461')] +[2023-02-27 12:26:07,623][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003099_12693504.pth... +[2023-02-27 12:26:07,876][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002931_12005376.pth +[2023-02-27 12:26:10,022][47465] Updated weights for policy 0, policy_version 3101 (0.0012) +[2023-02-27 12:26:12,607][00394] Fps is (10 sec: 3275.5, 60 sec: 3549.6, 300 sec: 3183.7). Total num frames: 12705792. Throughput: 0: 869.5. Samples: 174726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:12,610][00394] Avg episode reward: [(0, '23.324')] +[2023-02-27 12:26:17,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3185.8). Total num frames: 12722176. Throughput: 0: 868.9. Samples: 178906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:17,606][00394] Avg episode reward: [(0, '23.863')] +[2023-02-27 12:26:21,609][47465] Updated weights for policy 0, policy_version 3111 (0.0025) +[2023-02-27 12:26:22,603][00394] Fps is (10 sec: 3687.9, 60 sec: 3481.6, 300 sec: 3205.6). Total num frames: 12742656. Throughput: 0: 907.6. Samples: 185440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:22,606][00394] Avg episode reward: [(0, '23.294')] +[2023-02-27 12:26:27,605][00394] Fps is (10 sec: 4095.3, 60 sec: 3549.8, 300 sec: 3224.5). Total num frames: 12763136. Throughput: 0: 908.2. Samples: 188754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:26:27,610][00394] Avg episode reward: [(0, '23.808')] +[2023-02-27 12:26:32,603][00394] Fps is (10 sec: 3686.3, 60 sec: 3549.8, 300 sec: 3225.6). Total num frames: 12779520. Throughput: 0: 864.6. Samples: 193236. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:26:32,606][00394] Avg episode reward: [(0, '24.100')] +[2023-02-27 12:26:33,812][47465] Updated weights for policy 0, policy_version 3121 (0.0012) +[2023-02-27 12:26:37,603][00394] Fps is (10 sec: 2867.8, 60 sec: 3481.6, 300 sec: 3209.9). Total num frames: 12791808. Throughput: 0: 871.0. Samples: 197574. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:26:37,606][00394] Avg episode reward: [(0, '25.155')] +[2023-02-27 12:26:42,603][00394] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3244.0). Total num frames: 12816384. Throughput: 0: 899.4. Samples: 200882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:42,606][00394] Avg episode reward: [(0, '25.320')] +[2023-02-27 12:26:44,289][47465] Updated weights for policy 0, policy_version 3131 (0.0023) +[2023-02-27 12:26:47,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3550.2, 300 sec: 3260.7). Total num frames: 12836864. Throughput: 0: 905.3. Samples: 207508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:47,610][00394] Avg episode reward: [(0, '26.214')] +[2023-02-27 12:26:47,626][47447] Saving new best policy, reward=26.214! +[2023-02-27 12:26:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3245.3). Total num frames: 12849152. Throughput: 0: 866.9. Samples: 211754. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:26:52,606][00394] Avg episode reward: [(0, '26.207')] +[2023-02-27 12:26:57,506][47465] Updated weights for policy 0, policy_version 3141 (0.0031) +[2023-02-27 12:26:57,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3245.9). Total num frames: 12865536. Throughput: 0: 868.7. Samples: 213816. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:26:57,606][00394] Avg episode reward: [(0, '25.927')] +[2023-02-27 12:27:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3261.6). Total num frames: 12886016. Throughput: 0: 905.7. Samples: 219662. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:27:02,606][00394] Avg episode reward: [(0, '25.486')] +[2023-02-27 12:27:06,758][47465] Updated weights for policy 0, policy_version 3151 (0.0016) +[2023-02-27 12:27:07,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 12906496. Throughput: 0: 908.4. Samples: 226320. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:07,610][00394] Avg episode reward: [(0, '24.556')] +[2023-02-27 12:27:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.4, 300 sec: 3276.8). Total num frames: 12922880. Throughput: 0: 881.5. Samples: 228418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:27:12,608][00394] Avg episode reward: [(0, '24.172')] +[2023-02-27 12:27:17,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3262.4). Total num frames: 12935168. Throughput: 0: 875.6. Samples: 232636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:27:17,609][00394] Avg episode reward: [(0, '22.179')] +[2023-02-27 12:27:19,937][47465] Updated weights for policy 0, policy_version 3161 (0.0022) +[2023-02-27 12:27:22,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3290.9). Total num frames: 12959744. Throughput: 0: 915.3. Samples: 238762. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:22,608][00394] Avg episode reward: [(0, '22.699')] +[2023-02-27 12:27:27,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3618.3, 300 sec: 3304.6). Total num frames: 12980224. Throughput: 0: 914.8. Samples: 242048. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:27,609][00394] Avg episode reward: [(0, '24.003')] +[2023-02-27 12:27:29,989][47465] Updated weights for policy 0, policy_version 3171 (0.0021) +[2023-02-27 12:27:32,610][00394] Fps is (10 sec: 3274.5, 60 sec: 3549.5, 300 sec: 3346.1). Total num frames: 12992512. Throughput: 0: 878.1. Samples: 247028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:27:32,613][00394] Avg episode reward: [(0, '24.111')] +[2023-02-27 12:27:37,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3401.8). Total num frames: 13008896. Throughput: 0: 877.9. Samples: 251258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:37,610][00394] Avg episode reward: [(0, '23.771')] +[2023-02-27 12:27:42,134][47465] Updated weights for policy 0, policy_version 3181 (0.0023) +[2023-02-27 12:27:42,603][00394] Fps is (10 sec: 3689.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 13029376. Throughput: 0: 902.3. Samples: 254420. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:42,610][00394] Avg episode reward: [(0, '24.889')] +[2023-02-27 12:27:47,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13049856. Throughput: 0: 918.4. Samples: 260988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:47,608][00394] Avg episode reward: [(0, '24.477')] +[2023-02-27 12:27:52,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 13066240. Throughput: 0: 876.1. Samples: 265744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:27:52,609][00394] Avg episode reward: [(0, '24.669')] +[2023-02-27 12:27:54,018][47465] Updated weights for policy 0, policy_version 3191 (0.0021) +[2023-02-27 12:27:57,604][00394] Fps is (10 sec: 2867.0, 60 sec: 3549.8, 300 sec: 3512.9). Total num frames: 13078528. Throughput: 0: 874.5. Samples: 267772. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-27 12:27:57,609][00394] Avg episode reward: [(0, '23.978')] +[2023-02-27 12:28:02,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3526.8). Total num frames: 13099008. Throughput: 0: 902.2. Samples: 273236. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:28:02,608][00394] Avg episode reward: [(0, '24.067')] +[2023-02-27 12:28:04,583][47465] Updated weights for policy 0, policy_version 3201 (0.0025) +[2023-02-27 12:28:07,603][00394] Fps is (10 sec: 4505.9, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 13123584. Throughput: 0: 916.8. Samples: 280018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:28:07,610][00394] Avg episode reward: [(0, '23.944')] +[2023-02-27 12:28:07,621][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003204_13123584.pth... +[2023-02-27 12:28:07,872][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002995_12267520.pth +[2023-02-27 12:28:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 13135872. Throughput: 0: 891.1. Samples: 282148. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:28:12,607][00394] Avg episode reward: [(0, '23.433')] +[2023-02-27 12:28:17,597][47465] Updated weights for policy 0, policy_version 3211 (0.0024) +[2023-02-27 12:28:17,603][00394] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 13148160. Throughput: 0: 873.9. Samples: 286346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:28:17,609][00394] Avg episode reward: [(0, '24.024')] +[2023-02-27 12:28:22,603][00394] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13172736. Throughput: 0: 907.8. Samples: 292110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:28:22,611][00394] Avg episode reward: [(0, '24.609')] +[2023-02-27 12:28:27,274][47465] Updated weights for policy 0, policy_version 3221 (0.0012) +[2023-02-27 12:28:27,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13193216. Throughput: 0: 910.8. Samples: 295406. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:28:27,610][00394] Avg episode reward: [(0, '24.000')] +[2023-02-27 12:28:32,603][00394] Fps is (10 sec: 3686.5, 60 sec: 3618.6, 300 sec: 3554.5). Total num frames: 13209600. Throughput: 0: 881.9. Samples: 300672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:28:32,607][00394] Avg episode reward: [(0, '23.523')] +[2023-02-27 12:28:37,604][00394] Fps is (10 sec: 2867.0, 60 sec: 3549.8, 300 sec: 3526.8). Total num frames: 13221888. Throughput: 0: 869.9. Samples: 304890. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:28:37,612][00394] Avg episode reward: [(0, '22.908')] +[2023-02-27 12:28:40,202][47465] Updated weights for policy 0, policy_version 3231 (0.0019) +[2023-02-27 12:28:42,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13242368. Throughput: 0: 886.3. Samples: 307654. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:28:42,614][00394] Avg episode reward: [(0, '21.978')] +[2023-02-27 12:28:47,603][00394] Fps is (10 sec: 4096.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13262848. Throughput: 0: 910.2. Samples: 314194. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:28:47,605][00394] Avg episode reward: [(0, '22.774')] +[2023-02-27 12:28:50,325][47465] Updated weights for policy 0, policy_version 3241 (0.0012) +[2023-02-27 12:28:52,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 13279232. Throughput: 0: 873.2. Samples: 319312. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:28:52,605][00394] Avg episode reward: [(0, '22.993')] +[2023-02-27 12:28:57,609][00394] Fps is (10 sec: 3274.9, 60 sec: 3617.8, 300 sec: 3540.5). Total num frames: 13295616. Throughput: 0: 871.3. Samples: 321362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:28:57,612][00394] Avg episode reward: [(0, '24.697')] +[2023-02-27 12:29:02,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.7). Total num frames: 13312000. Throughput: 0: 889.4. Samples: 326370. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:29:02,605][00394] Avg episode reward: [(0, '23.877')] +[2023-02-27 12:29:02,899][47465] Updated weights for policy 0, policy_version 3251 (0.0013) +[2023-02-27 12:29:07,603][00394] Fps is (10 sec: 4098.4, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 13336576. Throughput: 0: 909.1. Samples: 333020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:29:07,605][00394] Avg episode reward: [(0, '25.253')] +[2023-02-27 12:29:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 13348864. Throughput: 0: 895.2. Samples: 335690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:29:12,618][00394] Avg episode reward: [(0, '25.601')] +[2023-02-27 12:29:14,229][47465] Updated weights for policy 0, policy_version 3261 (0.0012) +[2023-02-27 12:29:17,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 13365248. Throughput: 0: 868.8. Samples: 339768. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:29:17,609][00394] Avg episode reward: [(0, '26.585')] +[2023-02-27 12:29:17,624][47447] Saving new best policy, reward=26.585! +[2023-02-27 12:29:22,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 13381632. Throughput: 0: 893.3. Samples: 345088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:29:22,614][00394] Avg episode reward: [(0, '27.246')] +[2023-02-27 12:29:22,616][47447] Saving new best policy, reward=27.246! +[2023-02-27 12:29:25,650][47465] Updated weights for policy 0, policy_version 3271 (0.0021) +[2023-02-27 12:29:27,603][00394] Fps is (10 sec: 4095.9, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13406208. Throughput: 0: 903.9. Samples: 348330. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:29:27,605][00394] Avg episode reward: [(0, '27.334')] +[2023-02-27 12:29:27,620][47447] Saving new best policy, reward=27.334! +[2023-02-27 12:29:32,604][00394] Fps is (10 sec: 4095.5, 60 sec: 3549.8, 300 sec: 3568.4). Total num frames: 13422592. Throughput: 0: 885.1. Samples: 354024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:29:32,611][00394] Avg episode reward: [(0, '27.011')] +[2023-02-27 12:29:37,604][00394] Fps is (10 sec: 2867.1, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13434880. Throughput: 0: 863.5. Samples: 358168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:29:37,609][00394] Avg episode reward: [(0, '26.117')] +[2023-02-27 12:29:38,222][47465] Updated weights for policy 0, policy_version 3281 (0.0013) +[2023-02-27 12:29:42,603][00394] Fps is (10 sec: 3277.1, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 13455360. Throughput: 0: 869.8. Samples: 360498. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:29:42,605][00394] Avg episode reward: [(0, '25.807')] +[2023-02-27 12:29:47,603][00394] Fps is (10 sec: 4096.2, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13475840. Throughput: 0: 904.3. Samples: 367062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:29:47,609][00394] Avg episode reward: [(0, '25.822')] +[2023-02-27 12:29:48,233][47465] Updated weights for policy 0, policy_version 3291 (0.0022) +[2023-02-27 12:29:52,603][00394] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13492224. Throughput: 0: 880.0. Samples: 372622. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:29:52,610][00394] Avg episode reward: [(0, '24.253')] +[2023-02-27 12:29:57,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3550.2, 300 sec: 3554.5). Total num frames: 13508608. Throughput: 0: 868.5. Samples: 374772. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:29:57,617][00394] Avg episode reward: [(0, '24.582')] +[2023-02-27 12:30:01,326][47465] Updated weights for policy 0, policy_version 3301 (0.0028) +[2023-02-27 12:30:02,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13524992. Throughput: 0: 878.5. Samples: 379302. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:30:02,611][00394] Avg episode reward: [(0, '24.098')] +[2023-02-27 12:30:07,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 13545472. Throughput: 0: 904.8. Samples: 385802. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:30:07,606][00394] Avg episode reward: [(0, '24.046')] +[2023-02-27 12:30:07,618][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003307_13545472.pth... +[2023-02-27 12:30:07,815][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003099_12693504.pth +[2023-02-27 12:30:11,136][47465] Updated weights for policy 0, policy_version 3311 (0.0012) +[2023-02-27 12:30:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13561856. Throughput: 0: 904.0. Samples: 389008. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:30:12,610][00394] Avg episode reward: [(0, '23.434')] +[2023-02-27 12:30:17,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13578240. Throughput: 0: 868.1. Samples: 393086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:30:17,611][00394] Avg episode reward: [(0, '22.720')] +[2023-02-27 12:30:22,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13594624. Throughput: 0: 878.6. Samples: 397704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:30:22,610][00394] Avg episode reward: [(0, '22.343')] +[2023-02-27 12:30:24,219][47465] Updated weights for policy 0, policy_version 3321 (0.0016) +[2023-02-27 12:30:27,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 13615104. Throughput: 0: 900.4. Samples: 401014. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:30:27,610][00394] Avg episode reward: [(0, '23.183')] +[2023-02-27 12:30:32,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 13635584. Throughput: 0: 899.3. Samples: 407530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:30:32,606][00394] Avg episode reward: [(0, '23.832')] +[2023-02-27 12:30:35,026][47465] Updated weights for policy 0, policy_version 3331 (0.0034) +[2023-02-27 12:30:37,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13647872. Throughput: 0: 866.8. Samples: 411628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:30:37,610][00394] Avg episode reward: [(0, '24.471')] +[2023-02-27 12:30:42,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3526.8). Total num frames: 13664256. Throughput: 0: 865.2. Samples: 413704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:30:42,606][00394] Avg episode reward: [(0, '25.189')] +[2023-02-27 12:30:46,933][47465] Updated weights for policy 0, policy_version 3341 (0.0027) +[2023-02-27 12:30:47,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 13684736. Throughput: 0: 896.8. Samples: 419656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:30:47,606][00394] Avg episode reward: [(0, '25.283')] +[2023-02-27 12:30:52,609][00394] Fps is (10 sec: 4093.5, 60 sec: 3549.5, 300 sec: 3568.3). Total num frames: 13705216. Throughput: 0: 891.7. Samples: 425936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:30:52,611][00394] Avg episode reward: [(0, '24.673')] +[2023-02-27 12:30:57,605][00394] Fps is (10 sec: 3685.8, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 13721600. Throughput: 0: 868.1. Samples: 428072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:30:57,613][00394] Avg episode reward: [(0, '23.926')] +[2023-02-27 12:30:58,979][47465] Updated weights for policy 0, policy_version 3351 (0.0026) +[2023-02-27 12:31:02,603][00394] Fps is (10 sec: 2869.0, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 13733888. Throughput: 0: 872.3. Samples: 432338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:31:02,610][00394] Avg episode reward: [(0, '22.356')] +[2023-02-27 12:31:07,603][00394] Fps is (10 sec: 3686.8, 60 sec: 3549.8, 300 sec: 3568.4). Total num frames: 13758464. Throughput: 0: 909.1. Samples: 438614. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:31:07,605][00394] Avg episode reward: [(0, '20.339')] +[2023-02-27 12:31:09,188][47465] Updated weights for policy 0, policy_version 3361 (0.0021) +[2023-02-27 12:31:12,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 13778944. Throughput: 0: 910.5. Samples: 441986. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:31:12,610][00394] Avg episode reward: [(0, '20.288')] +[2023-02-27 12:31:17,608][00394] Fps is (10 sec: 3275.2, 60 sec: 3549.6, 300 sec: 3554.4). Total num frames: 13791232. Throughput: 0: 878.3. Samples: 447060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:31:17,618][00394] Avg episode reward: [(0, '19.104')] +[2023-02-27 12:31:22,150][47465] Updated weights for policy 0, policy_version 3371 (0.0024) +[2023-02-27 12:31:22,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 13807616. Throughput: 0: 876.7. Samples: 451078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:31:22,609][00394] Avg episode reward: [(0, '20.681')] +[2023-02-27 12:31:27,603][00394] Fps is (10 sec: 3688.2, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 13828096. Throughput: 0: 900.6. Samples: 454232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:31:27,611][00394] Avg episode reward: [(0, '22.663')] +[2023-02-27 12:31:31,398][47465] Updated weights for policy 0, policy_version 3381 (0.0012) +[2023-02-27 12:31:32,612][00394] Fps is (10 sec: 4505.1, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 13852672. Throughput: 0: 919.7. Samples: 461042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:31:32,615][00394] Avg episode reward: [(0, '23.490')] +[2023-02-27 12:31:37,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 13864960. Throughput: 0: 881.3. Samples: 465588. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:31:37,608][00394] Avg episode reward: [(0, '24.674')] +[2023-02-27 12:31:42,603][00394] Fps is (10 sec: 2457.9, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 13877248. Throughput: 0: 878.8. Samples: 467618. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:31:42,611][00394] Avg episode reward: [(0, '25.741')] +[2023-02-27 12:31:44,635][47465] Updated weights for policy 0, policy_version 3391 (0.0018) +[2023-02-27 12:31:47,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 13901824. Throughput: 0: 907.6. Samples: 473182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:31:47,610][00394] Avg episode reward: [(0, '27.413')] +[2023-02-27 12:31:47,625][47447] Saving new best policy, reward=27.413! +[2023-02-27 12:31:52,603][00394] Fps is (10 sec: 4505.7, 60 sec: 3618.5, 300 sec: 3582.3). Total num frames: 13922304. Throughput: 0: 912.6. Samples: 479682. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:31:52,609][00394] Avg episode reward: [(0, '28.543')] +[2023-02-27 12:31:52,613][47447] Saving new best policy, reward=28.543! +[2023-02-27 12:31:55,373][47465] Updated weights for policy 0, policy_version 3401 (0.0016) +[2023-02-27 12:31:57,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3554.5). Total num frames: 13934592. Throughput: 0: 884.5. Samples: 481788. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:31:57,608][00394] Avg episode reward: [(0, '26.938')] +[2023-02-27 12:32:02,604][00394] Fps is (10 sec: 2867.0, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 13950976. Throughput: 0: 864.5. Samples: 485960. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:02,615][00394] Avg episode reward: [(0, '26.518')] +[2023-02-27 12:32:07,580][47465] Updated weights for policy 0, policy_version 3411 (0.0014) +[2023-02-27 12:32:07,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 13971456. Throughput: 0: 902.1. Samples: 491672. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:07,607][00394] Avg episode reward: [(0, '26.794')] +[2023-02-27 12:32:07,622][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003411_13971456.pth... +[2023-02-27 12:32:07,794][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003204_13123584.pth +[2023-02-27 12:32:12,603][00394] Fps is (10 sec: 4096.3, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 13991936. Throughput: 0: 902.7. Samples: 494854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:32:12,611][00394] Avg episode reward: [(0, '25.678')] +[2023-02-27 12:32:17,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3550.2, 300 sec: 3540.6). Total num frames: 14004224. Throughput: 0: 871.9. Samples: 500276. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:32:17,609][00394] Avg episode reward: [(0, '24.496')] +[2023-02-27 12:32:19,251][47465] Updated weights for policy 0, policy_version 3421 (0.0020) +[2023-02-27 12:32:22,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 14020608. Throughput: 0: 862.7. Samples: 504408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:22,610][00394] Avg episode reward: [(0, '23.926')] +[2023-02-27 12:32:27,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.6). Total num frames: 14041088. Throughput: 0: 874.0. Samples: 506946. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:32:27,608][00394] Avg episode reward: [(0, '24.789')] +[2023-02-27 12:32:30,463][47465] Updated weights for policy 0, policy_version 3431 (0.0021) +[2023-02-27 12:32:32,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3481.7, 300 sec: 3568.4). Total num frames: 14061568. Throughput: 0: 898.0. Samples: 513590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:32:32,609][00394] Avg episode reward: [(0, '24.318')] +[2023-02-27 12:32:37,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14077952. Throughput: 0: 869.1. Samples: 518790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:37,611][00394] Avg episode reward: [(0, '24.150')] +[2023-02-27 12:32:42,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 14090240. Throughput: 0: 868.0. Samples: 520846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:32:42,607][00394] Avg episode reward: [(0, '24.832')] +[2023-02-27 12:32:43,163][47465] Updated weights for policy 0, policy_version 3441 (0.0035) +[2023-02-27 12:32:47,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 14110720. Throughput: 0: 882.0. Samples: 525650. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:47,605][00394] Avg episode reward: [(0, '25.614')] +[2023-02-27 12:32:52,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 14131200. Throughput: 0: 903.3. Samples: 532322. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:32:52,610][00394] Avg episode reward: [(0, '26.365')] +[2023-02-27 12:32:52,875][47465] Updated weights for policy 0, policy_version 3451 (0.0014) +[2023-02-27 12:32:57,605][00394] Fps is (10 sec: 3685.7, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 14147584. Throughput: 0: 899.0. Samples: 535310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:32:57,610][00394] Avg episode reward: [(0, '24.379')] +[2023-02-27 12:33:02,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 14163968. Throughput: 0: 870.8. Samples: 539462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:33:02,607][00394] Avg episode reward: [(0, '25.694')] +[2023-02-27 12:33:06,043][47465] Updated weights for policy 0, policy_version 3461 (0.0012) +[2023-02-27 12:33:07,603][00394] Fps is (10 sec: 3277.4, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 14180352. Throughput: 0: 891.3. Samples: 544518. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:33:07,605][00394] Avg episode reward: [(0, '25.695')] +[2023-02-27 12:33:12,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 14204928. Throughput: 0: 908.0. Samples: 547804. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:33:12,606][00394] Avg episode reward: [(0, '24.153')] +[2023-02-27 12:33:15,466][47465] Updated weights for policy 0, policy_version 3471 (0.0012) +[2023-02-27 12:33:17,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 14221312. Throughput: 0: 896.8. Samples: 553948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:33:17,609][00394] Avg episode reward: [(0, '22.463')] +[2023-02-27 12:33:22,606][00394] Fps is (10 sec: 2866.3, 60 sec: 3549.7, 300 sec: 3526.7). Total num frames: 14233600. Throughput: 0: 874.5. Samples: 558144. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:33:22,609][00394] Avg episode reward: [(0, '22.374')] +[2023-02-27 12:33:27,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 14254080. Throughput: 0: 873.8. Samples: 560168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:33:27,606][00394] Avg episode reward: [(0, '24.747')] +[2023-02-27 12:33:28,525][47465] Updated weights for policy 0, policy_version 3481 (0.0026) +[2023-02-27 12:33:32,603][00394] Fps is (10 sec: 4097.3, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14274560. Throughput: 0: 911.2. Samples: 566652. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:33:32,610][00394] Avg episode reward: [(0, '23.463')] +[2023-02-27 12:33:37,603][00394] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 14295040. Throughput: 0: 896.8. Samples: 572678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:33:37,608][00394] Avg episode reward: [(0, '23.962')] +[2023-02-27 12:33:38,951][47465] Updated weights for policy 0, policy_version 3491 (0.0020) +[2023-02-27 12:33:42,608][00394] Fps is (10 sec: 3275.2, 60 sec: 3617.8, 300 sec: 3540.6). Total num frames: 14307328. Throughput: 0: 875.9. Samples: 574728. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:33:42,611][00394] Avg episode reward: [(0, '23.450')] +[2023-02-27 12:33:47,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 14323712. Throughput: 0: 880.4. Samples: 579080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:33:47,611][00394] Avg episode reward: [(0, '23.769')] +[2023-02-27 12:33:50,759][47465] Updated weights for policy 0, policy_version 3501 (0.0016) +[2023-02-27 12:33:52,603][00394] Fps is (10 sec: 3688.2, 60 sec: 3549.9, 300 sec: 3554.6). Total num frames: 14344192. Throughput: 0: 916.1. Samples: 585742. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:33:52,611][00394] Avg episode reward: [(0, '23.897')] +[2023-02-27 12:33:57,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.3, 300 sec: 3568.4). Total num frames: 14364672. Throughput: 0: 916.7. Samples: 589056. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:33:57,610][00394] Avg episode reward: [(0, '23.325')] +[2023-02-27 12:34:02,579][47465] Updated weights for policy 0, policy_version 3511 (0.0019) +[2023-02-27 12:34:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 14381056. Throughput: 0: 876.8. Samples: 593404. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:34:02,610][00394] Avg episode reward: [(0, '23.122')] +[2023-02-27 12:34:07,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 14397440. Throughput: 0: 885.5. Samples: 597990. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:34:07,606][00394] Avg episode reward: [(0, '22.917')] +[2023-02-27 12:34:07,620][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003515_14397440.pth... +[2023-02-27 12:34:07,798][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003307_13545472.pth +[2023-02-27 12:34:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14417920. Throughput: 0: 913.7. Samples: 601284. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:34:12,606][00394] Avg episode reward: [(0, '22.576')] +[2023-02-27 12:34:13,319][47465] Updated weights for policy 0, policy_version 3521 (0.0033) +[2023-02-27 12:34:17,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 14438400. Throughput: 0: 917.7. Samples: 607948. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:34:17,608][00394] Avg episode reward: [(0, '22.725')] +[2023-02-27 12:34:22,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3540.6). Total num frames: 14450688. Throughput: 0: 876.4. Samples: 612118. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:34:22,606][00394] Avg episode reward: [(0, '22.287')] +[2023-02-27 12:34:26,489][47465] Updated weights for policy 0, policy_version 3531 (0.0044) +[2023-02-27 12:34:27,603][00394] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 14462976. Throughput: 0: 875.4. Samples: 614118. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:34:27,612][00394] Avg episode reward: [(0, '23.469')] +[2023-02-27 12:34:32,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14487552. Throughput: 0: 906.6. Samples: 619876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:34:32,610][00394] Avg episode reward: [(0, '23.504')] +[2023-02-27 12:34:35,985][47465] Updated weights for policy 0, policy_version 3541 (0.0025) +[2023-02-27 12:34:37,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14508032. Throughput: 0: 903.2. Samples: 626384. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:34:37,612][00394] Avg episode reward: [(0, '23.613')] +[2023-02-27 12:34:42,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.4, 300 sec: 3554.5). Total num frames: 14524416. Throughput: 0: 875.9. Samples: 628472. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:34:42,608][00394] Avg episode reward: [(0, '23.367')] +[2023-02-27 12:34:47,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 14536704. Throughput: 0: 870.8. Samples: 632588. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:34:47,611][00394] Avg episode reward: [(0, '22.671')] +[2023-02-27 12:34:49,233][47465] Updated weights for policy 0, policy_version 3551 (0.0017) +[2023-02-27 12:34:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14557184. Throughput: 0: 902.2. Samples: 638590. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:34:52,612][00394] Avg episode reward: [(0, '22.909')] +[2023-02-27 12:34:57,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14577664. Throughput: 0: 902.7. Samples: 641904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:34:57,605][00394] Avg episode reward: [(0, '22.056')] +[2023-02-27 12:34:58,983][47465] Updated weights for policy 0, policy_version 3561 (0.0012) +[2023-02-27 12:35:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14594048. Throughput: 0: 870.8. Samples: 647134. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:35:02,611][00394] Avg episode reward: [(0, '22.705')] +[2023-02-27 12:35:07,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 14606336. Throughput: 0: 870.6. Samples: 651294. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:35:07,609][00394] Avg episode reward: [(0, '23.382')] +[2023-02-27 12:35:11,518][47465] Updated weights for policy 0, policy_version 3571 (0.0017) +[2023-02-27 12:35:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14630912. Throughput: 0: 895.3. Samples: 654406. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:35:12,606][00394] Avg episode reward: [(0, '24.156')] +[2023-02-27 12:35:17,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 14651392. Throughput: 0: 913.2. Samples: 660970. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:35:17,608][00394] Avg episode reward: [(0, '24.364')] +[2023-02-27 12:35:22,603][00394] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 14663680. Throughput: 0: 877.0. Samples: 665848. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:35:22,610][00394] Avg episode reward: [(0, '24.373')] +[2023-02-27 12:35:22,750][47465] Updated weights for policy 0, policy_version 3581 (0.0014) +[2023-02-27 12:35:27,603][00394] Fps is (10 sec: 2867.1, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 14680064. Throughput: 0: 877.1. Samples: 667940. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:35:27,614][00394] Avg episode reward: [(0, '25.920')] +[2023-02-27 12:35:32,603][00394] Fps is (10 sec: 3686.6, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14700544. Throughput: 0: 907.7. Samples: 673436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:35:32,606][00394] Avg episode reward: [(0, '25.193')] +[2023-02-27 12:35:33,984][47465] Updated weights for policy 0, policy_version 3591 (0.0020) +[2023-02-27 12:35:37,603][00394] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 14721024. Throughput: 0: 919.3. Samples: 679958. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:35:37,606][00394] Avg episode reward: [(0, '23.773')] +[2023-02-27 12:35:42,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14737408. Throughput: 0: 900.9. Samples: 682446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:35:42,611][00394] Avg episode reward: [(0, '22.873')] +[2023-02-27 12:35:46,094][47465] Updated weights for policy 0, policy_version 3601 (0.0015) +[2023-02-27 12:35:47,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.7). Total num frames: 14749696. Throughput: 0: 876.0. Samples: 686554. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:35:47,616][00394] Avg episode reward: [(0, '23.253')] +[2023-02-27 12:35:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14770176. Throughput: 0: 908.0. Samples: 692152. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:35:52,605][00394] Avg episode reward: [(0, '23.321')] +[2023-02-27 12:35:56,514][47465] Updated weights for policy 0, policy_version 3611 (0.0024) +[2023-02-27 12:35:57,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 14794752. Throughput: 0: 913.0. Samples: 695492. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:35:57,606][00394] Avg episode reward: [(0, '21.411')] +[2023-02-27 12:36:02,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14807040. Throughput: 0: 888.0. Samples: 700932. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:02,611][00394] Avg episode reward: [(0, '22.540')] +[2023-02-27 12:36:07,604][00394] Fps is (10 sec: 2867.0, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 14823424. Throughput: 0: 871.9. Samples: 705084. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:07,606][00394] Avg episode reward: [(0, '23.299')] +[2023-02-27 12:36:07,625][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003619_14823424.pth... +[2023-02-27 12:36:07,841][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003411_13971456.pth +[2023-02-27 12:36:09,817][47465] Updated weights for policy 0, policy_version 3621 (0.0023) +[2023-02-27 12:36:12,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 14843904. Throughput: 0: 880.3. Samples: 707554. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:36:12,606][00394] Avg episode reward: [(0, '22.745')] +[2023-02-27 12:36:17,603][00394] Fps is (10 sec: 4096.2, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 14864384. Throughput: 0: 907.0. Samples: 714252. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:17,611][00394] Avg episode reward: [(0, '22.282')] +[2023-02-27 12:36:19,116][47465] Updated weights for policy 0, policy_version 3631 (0.0025) +[2023-02-27 12:36:22,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 14880768. Throughput: 0: 878.8. Samples: 719502. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:22,613][00394] Avg episode reward: [(0, '23.255')] +[2023-02-27 12:36:27,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 14893056. Throughput: 0: 870.1. Samples: 721600. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:27,605][00394] Avg episode reward: [(0, '23.309')] +[2023-02-27 12:36:32,292][47465] Updated weights for policy 0, policy_version 3641 (0.0031) +[2023-02-27 12:36:32,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14913536. Throughput: 0: 885.7. Samples: 726410. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:32,616][00394] Avg episode reward: [(0, '23.409')] +[2023-02-27 12:36:37,603][00394] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 14934016. Throughput: 0: 906.8. Samples: 732960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:36:37,611][00394] Avg episode reward: [(0, '23.854')] +[2023-02-27 12:36:42,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14950400. Throughput: 0: 901.0. Samples: 736038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:42,610][00394] Avg episode reward: [(0, '25.768')] +[2023-02-27 12:36:42,847][47465] Updated weights for policy 0, policy_version 3651 (0.0022) +[2023-02-27 12:36:47,606][00394] Fps is (10 sec: 3275.8, 60 sec: 3617.9, 300 sec: 3540.6). Total num frames: 14966784. Throughput: 0: 872.8. Samples: 740210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:36:47,612][00394] Avg episode reward: [(0, '26.055')] +[2023-02-27 12:36:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 14983168. Throughput: 0: 889.3. Samples: 745100. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:36:52,613][00394] Avg episode reward: [(0, '27.087')] +[2023-02-27 12:36:54,938][47465] Updated weights for policy 0, policy_version 3661 (0.0032) +[2023-02-27 12:36:57,607][00394] Fps is (10 sec: 3686.1, 60 sec: 3481.4, 300 sec: 3568.3). Total num frames: 15003648. Throughput: 0: 906.1. Samples: 748334. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:36:57,610][00394] Avg episode reward: [(0, '27.212')] +[2023-02-27 12:37:02,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 15024128. Throughput: 0: 891.7. Samples: 754376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:37:02,606][00394] Avg episode reward: [(0, '26.920')] +[2023-02-27 12:37:06,724][47465] Updated weights for policy 0, policy_version 3671 (0.0020) +[2023-02-27 12:37:07,603][00394] Fps is (10 sec: 3278.1, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15036416. Throughput: 0: 868.0. Samples: 758560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:37:07,606][00394] Avg episode reward: [(0, '27.278')] +[2023-02-27 12:37:12,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 15052800. Throughput: 0: 866.9. Samples: 760612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:37:12,606][00394] Avg episode reward: [(0, '26.393')] +[2023-02-27 12:37:17,538][47465] Updated weights for policy 0, policy_version 3681 (0.0014) +[2023-02-27 12:37:17,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15077376. Throughput: 0: 904.0. Samples: 767092. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:37:17,610][00394] Avg episode reward: [(0, '26.899')] +[2023-02-27 12:37:22,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15093760. Throughput: 0: 891.2. Samples: 773062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:37:22,607][00394] Avg episode reward: [(0, '26.427')] +[2023-02-27 12:37:27,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15106048. Throughput: 0: 867.3. Samples: 775068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:37:27,606][00394] Avg episode reward: [(0, '26.840')] +[2023-02-27 12:37:30,628][47465] Updated weights for policy 0, policy_version 3691 (0.0018) +[2023-02-27 12:37:32,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15126528. Throughput: 0: 869.2. Samples: 779320. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-27 12:37:32,606][00394] Avg episode reward: [(0, '25.015')] +[2023-02-27 12:37:37,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15147008. Throughput: 0: 908.7. Samples: 785992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:37:37,606][00394] Avg episode reward: [(0, '25.030')] +[2023-02-27 12:37:39,826][47465] Updated weights for policy 0, policy_version 3701 (0.0013) +[2023-02-27 12:37:42,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 15167488. Throughput: 0: 912.4. Samples: 789388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:37:42,607][00394] Avg episode reward: [(0, '23.222')] +[2023-02-27 12:37:47,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3550.1, 300 sec: 3554.5). Total num frames: 15179776. Throughput: 0: 875.3. Samples: 793766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:37:47,607][00394] Avg episode reward: [(0, '22.401')] +[2023-02-27 12:37:52,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15196160. Throughput: 0: 881.0. Samples: 798204. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:37:52,608][00394] Avg episode reward: [(0, '23.037')] +[2023-02-27 12:37:53,191][47465] Updated weights for policy 0, policy_version 3711 (0.0020) +[2023-02-27 12:37:57,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3550.1, 300 sec: 3568.4). Total num frames: 15216640. Throughput: 0: 907.1. Samples: 801432. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:37:57,606][00394] Avg episode reward: [(0, '23.523')] +[2023-02-27 12:38:02,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15237120. Throughput: 0: 908.3. Samples: 807966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:38:02,610][00394] Avg episode reward: [(0, '24.048')] +[2023-02-27 12:38:03,347][47465] Updated weights for policy 0, policy_version 3721 (0.0014) +[2023-02-27 12:38:07,603][00394] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15249408. Throughput: 0: 867.7. Samples: 812108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:38:07,620][00394] Avg episode reward: [(0, '24.317')] +[2023-02-27 12:38:07,645][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003723_15249408.pth... +[2023-02-27 12:38:07,947][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003515_14397440.pth +[2023-02-27 12:38:12,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15265792. Throughput: 0: 867.9. Samples: 814124. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:38:12,606][00394] Avg episode reward: [(0, '25.960')] +[2023-02-27 12:38:15,748][47465] Updated weights for policy 0, policy_version 3731 (0.0041) +[2023-02-27 12:38:17,603][00394] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 15286272. Throughput: 0: 909.0. Samples: 820224. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:38:17,606][00394] Avg episode reward: [(0, '26.885')] +[2023-02-27 12:38:22,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15306752. Throughput: 0: 901.3. Samples: 826550. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:38:22,614][00394] Avg episode reward: [(0, '26.914')] +[2023-02-27 12:38:27,041][47465] Updated weights for policy 0, policy_version 3741 (0.0016) +[2023-02-27 12:38:27,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 15323136. Throughput: 0: 871.2. Samples: 828592. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:38:27,610][00394] Avg episode reward: [(0, '26.841')] +[2023-02-27 12:38:32,603][00394] Fps is (10 sec: 2867.1, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 15335424. Throughput: 0: 865.7. Samples: 832724. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:38:32,607][00394] Avg episode reward: [(0, '26.532')] +[2023-02-27 12:38:37,603][00394] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15360000. Throughput: 0: 908.4. Samples: 839082. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:38:37,610][00394] Avg episode reward: [(0, '27.558')] +[2023-02-27 12:38:38,340][47465] Updated weights for policy 0, policy_version 3751 (0.0024) +[2023-02-27 12:38:42,603][00394] Fps is (10 sec: 4505.8, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15380480. Throughput: 0: 910.8. Samples: 842418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:38:42,611][00394] Avg episode reward: [(0, '26.173')] +[2023-02-27 12:38:47,606][00394] Fps is (10 sec: 3275.9, 60 sec: 3549.7, 300 sec: 3554.5). Total num frames: 15392768. Throughput: 0: 872.8. Samples: 847244. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:38:47,613][00394] Avg episode reward: [(0, '26.044')] +[2023-02-27 12:38:50,850][47465] Updated weights for policy 0, policy_version 3761 (0.0023) +[2023-02-27 12:38:52,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15409152. Throughput: 0: 873.7. Samples: 851426. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:38:52,612][00394] Avg episode reward: [(0, '26.236')] +[2023-02-27 12:38:57,603][00394] Fps is (10 sec: 3687.5, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15429632. Throughput: 0: 900.7. Samples: 854656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:38:57,605][00394] Avg episode reward: [(0, '27.420')] +[2023-02-27 12:39:00,649][47465] Updated weights for policy 0, policy_version 3771 (0.0012) +[2023-02-27 12:39:02,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15450112. Throughput: 0: 910.2. Samples: 861182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:39:02,605][00394] Avg episode reward: [(0, '26.789')] +[2023-02-27 12:39:07,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 15466496. Throughput: 0: 869.7. Samples: 865688. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:39:07,606][00394] Avg episode reward: [(0, '27.136')] +[2023-02-27 12:39:12,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 15478784. Throughput: 0: 869.7. Samples: 867728. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:39:12,606][00394] Avg episode reward: [(0, '26.613')] +[2023-02-27 12:39:14,085][47465] Updated weights for policy 0, policy_version 3781 (0.0024) +[2023-02-27 12:39:17,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15499264. Throughput: 0: 900.5. Samples: 873248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:39:17,610][00394] Avg episode reward: [(0, '28.290')] +[2023-02-27 12:39:22,604][00394] Fps is (10 sec: 4505.4, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 15523840. Throughput: 0: 907.3. Samples: 879910. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:39:22,614][00394] Avg episode reward: [(0, '28.558')] +[2023-02-27 12:39:22,617][47447] Saving new best policy, reward=28.558! +[2023-02-27 12:39:23,974][47465] Updated weights for policy 0, policy_version 3791 (0.0014) +[2023-02-27 12:39:27,603][00394] Fps is (10 sec: 3686.3, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 15536128. Throughput: 0: 879.9. Samples: 882016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:39:27,606][00394] Avg episode reward: [(0, '28.321')] +[2023-02-27 12:39:32,604][00394] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 15548416. Throughput: 0: 863.6. Samples: 886102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:39:32,611][00394] Avg episode reward: [(0, '27.980')] +[2023-02-27 12:39:36,937][47465] Updated weights for policy 0, policy_version 3801 (0.0017) +[2023-02-27 12:39:37,603][00394] Fps is (10 sec: 3276.9, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 15568896. Throughput: 0: 897.7. Samples: 891824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:39:37,606][00394] Avg episode reward: [(0, '28.866')] +[2023-02-27 12:39:37,615][47447] Saving new best policy, reward=28.866! +[2023-02-27 12:39:42,603][00394] Fps is (10 sec: 4505.9, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15593472. Throughput: 0: 898.7. Samples: 895098. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:39:42,605][00394] Avg episode reward: [(0, '29.020')] +[2023-02-27 12:39:42,612][47447] Saving new best policy, reward=29.020! +[2023-02-27 12:39:47,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3550.0, 300 sec: 3554.5). Total num frames: 15605760. Throughput: 0: 868.7. Samples: 900272. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:39:47,611][00394] Avg episode reward: [(0, '28.330')] +[2023-02-27 12:39:47,958][47465] Updated weights for policy 0, policy_version 3811 (0.0021) +[2023-02-27 12:39:52,603][00394] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 15618048. Throughput: 0: 859.7. Samples: 904374. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-27 12:39:52,612][00394] Avg episode reward: [(0, '28.337')] +[2023-02-27 12:39:57,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 15638528. Throughput: 0: 873.9. Samples: 907054. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-27 12:39:57,611][00394] Avg episode reward: [(0, '27.669')] +[2023-02-27 12:39:59,683][47465] Updated weights for policy 0, policy_version 3821 (0.0012) +[2023-02-27 12:40:02,603][00394] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 15663104. Throughput: 0: 898.9. Samples: 913700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:40:02,608][00394] Avg episode reward: [(0, '26.112')] +[2023-02-27 12:40:07,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 15675392. Throughput: 0: 862.1. Samples: 918702. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:40:07,609][00394] Avg episode reward: [(0, '24.931')] +[2023-02-27 12:40:07,625][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003827_15675392.pth... +[2023-02-27 12:40:07,897][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003619_14823424.pth +[2023-02-27 12:40:12,491][47465] Updated weights for policy 0, policy_version 3831 (0.0012) +[2023-02-27 12:40:12,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 15691776. Throughput: 0: 861.2. Samples: 920770. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:40:12,606][00394] Avg episode reward: [(0, '24.978')] +[2023-02-27 12:40:17,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15712256. Throughput: 0: 881.5. Samples: 925770. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:40:17,606][00394] Avg episode reward: [(0, '25.431')] +[2023-02-27 12:40:22,055][47465] Updated weights for policy 0, policy_version 3841 (0.0018) +[2023-02-27 12:40:22,603][00394] Fps is (10 sec: 4095.9, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 15732736. Throughput: 0: 901.5. Samples: 932394. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:40:22,607][00394] Avg episode reward: [(0, '25.530')] +[2023-02-27 12:40:27,603][00394] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 15749120. Throughput: 0: 893.5. Samples: 935304. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:40:27,606][00394] Avg episode reward: [(0, '26.860')] +[2023-02-27 12:40:32,603][00394] Fps is (10 sec: 2867.3, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 15761408. Throughput: 0: 871.5. Samples: 939490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-27 12:40:32,609][00394] Avg episode reward: [(0, '27.033')] +[2023-02-27 12:40:35,258][47465] Updated weights for policy 0, policy_version 3851 (0.0017) +[2023-02-27 12:40:37,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15781888. Throughput: 0: 893.6. Samples: 944586. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:40:37,605][00394] Avg episode reward: [(0, '26.288')] +[2023-02-27 12:40:42,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 15802368. Throughput: 0: 907.9. Samples: 947908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:40:42,606][00394] Avg episode reward: [(0, '27.333')] +[2023-02-27 12:40:44,509][47465] Updated weights for policy 0, policy_version 3861 (0.0012) +[2023-02-27 12:40:47,606][00394] Fps is (10 sec: 4094.7, 60 sec: 3617.9, 300 sec: 3568.3). Total num frames: 15822848. Throughput: 0: 896.0. Samples: 954022. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:40:47,609][00394] Avg episode reward: [(0, '27.727')] +[2023-02-27 12:40:52,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 15835136. Throughput: 0: 876.2. Samples: 958130. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:40:52,612][00394] Avg episode reward: [(0, '28.085')] +[2023-02-27 12:40:57,603][00394] Fps is (10 sec: 2868.1, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15851520. Throughput: 0: 877.0. Samples: 960234. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-27 12:40:57,605][00394] Avg episode reward: [(0, '26.525')] +[2023-02-27 12:40:57,743][47465] Updated weights for policy 0, policy_version 3871 (0.0025) +[2023-02-27 12:41:02,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15876096. Throughput: 0: 915.0. Samples: 966946. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:41:02,611][00394] Avg episode reward: [(0, '25.830')] +[2023-02-27 12:41:07,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 15892480. Throughput: 0: 896.6. Samples: 972742. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:41:07,616][00394] Avg episode reward: [(0, '26.253')] +[2023-02-27 12:41:07,952][47465] Updated weights for policy 0, policy_version 3881 (0.0016) +[2023-02-27 12:41:12,604][00394] Fps is (10 sec: 3276.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 15908864. Throughput: 0: 877.5. Samples: 974792. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:41:12,606][00394] Avg episode reward: [(0, '25.187')] +[2023-02-27 12:41:17,603][00394] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15925248. Throughput: 0: 884.4. Samples: 979288. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-27 12:41:17,609][00394] Avg episode reward: [(0, '25.891')] +[2023-02-27 12:41:20,144][47465] Updated weights for policy 0, policy_version 3891 (0.0031) +[2023-02-27 12:41:22,603][00394] Fps is (10 sec: 3686.9, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 15945728. Throughput: 0: 916.6. Samples: 985834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-27 12:41:22,606][00394] Avg episode reward: [(0, '25.764')] +[2023-02-27 12:41:27,603][00394] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 15966208. Throughput: 0: 916.2. Samples: 989136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-27 12:41:27,607][00394] Avg episode reward: [(0, '26.755')] +[2023-02-27 12:41:31,716][47465] Updated weights for policy 0, policy_version 3901 (0.0016) +[2023-02-27 12:41:32,607][00394] Fps is (10 sec: 3275.5, 60 sec: 3617.9, 300 sec: 3540.6). Total num frames: 15978496. Throughput: 0: 875.5. Samples: 993420. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:41:32,615][00394] Avg episode reward: [(0, '26.698')] +[2023-02-27 12:41:37,603][00394] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 15994880. Throughput: 0: 890.3. Samples: 998194. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-27 12:41:37,606][00394] Avg episode reward: [(0, '26.868')] +[2023-02-27 12:41:39,710][47447] Stopping Batcher_0... +[2023-02-27 12:41:39,711][47447] Loop batcher_evt_loop terminating... +[2023-02-27 12:41:39,714][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth... +[2023-02-27 12:41:39,711][00394] Component Batcher_0 stopped! +[2023-02-27 12:41:39,805][47465] Weights refcount: 2 0 +[2023-02-27 12:41:39,841][00394] Component InferenceWorker_p0-w0 stopped! +[2023-02-27 12:41:39,843][47465] Stopping InferenceWorker_p0-w0... +[2023-02-27 12:41:39,844][47465] Loop inference_proc0-0_evt_loop terminating... +[2023-02-27 12:41:39,920][47447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003723_15249408.pth +[2023-02-27 12:41:39,925][47447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth... +[2023-02-27 12:41:40,066][47447] Stopping LearnerWorker_p0... +[2023-02-27 12:41:40,068][47447] Loop learner_proc0_evt_loop terminating... +[2023-02-27 12:41:40,066][00394] Component LearnerWorker_p0 stopped! +[2023-02-27 12:41:40,260][47476] Stopping RolloutWorker_w3... +[2023-02-27 12:41:40,261][47466] Stopping RolloutWorker_w1... +[2023-02-27 12:41:40,259][00394] Component RolloutWorker_w0 stopped! +[2023-02-27 12:41:40,267][47488] Stopping RolloutWorker_w5... +[2023-02-27 12:41:40,268][47476] Loop rollout_proc3_evt_loop terminating... +[2023-02-27 12:41:40,269][47466] Loop rollout_proc1_evt_loop terminating... +[2023-02-27 12:41:40,268][00394] Component RolloutWorker_w3 stopped! +[2023-02-27 12:41:40,268][47488] Loop rollout_proc5_evt_loop terminating... +[2023-02-27 12:41:40,271][00394] Component RolloutWorker_w1 stopped! +[2023-02-27 12:41:40,277][47486] Stopping RolloutWorker_w7... +[2023-02-27 12:41:40,278][47486] Loop rollout_proc7_evt_loop terminating... +[2023-02-27 12:41:40,275][00394] Component RolloutWorker_w5 stopped! +[2023-02-27 12:41:40,279][00394] Component RolloutWorker_w7 stopped! +[2023-02-27 12:41:40,280][47470] Stopping RolloutWorker_w0... +[2023-02-27 12:41:40,302][47470] Loop rollout_proc0_evt_loop terminating... +[2023-02-27 12:41:40,307][00394] Component RolloutWorker_w2 stopped! +[2023-02-27 12:41:40,308][47467] Stopping RolloutWorker_w2... +[2023-02-27 12:41:40,309][47467] Loop rollout_proc2_evt_loop terminating... +[2023-02-27 12:41:40,317][00394] Component RolloutWorker_w6 stopped! +[2023-02-27 12:41:40,321][47494] Stopping RolloutWorker_w6... +[2023-02-27 12:41:40,326][47480] Stopping RolloutWorker_w4... +[2023-02-27 12:41:40,326][00394] Component RolloutWorker_w4 stopped! +[2023-02-27 12:41:40,328][00394] Waiting for process learner_proc0 to stop... +[2023-02-27 12:41:40,342][47480] Loop rollout_proc4_evt_loop terminating... +[2023-02-27 12:41:40,339][47494] Loop rollout_proc6_evt_loop terminating... +[2023-02-27 12:41:43,575][00394] Waiting for process inference_proc0-0 to join... +[2023-02-27 12:41:43,649][00394] Waiting for process rollout_proc0 to join... +[2023-02-27 12:41:43,651][00394] Waiting for process rollout_proc1 to join... +[2023-02-27 12:41:43,657][00394] Waiting for process rollout_proc2 to join... +[2023-02-27 12:41:43,660][00394] Waiting for process rollout_proc3 to join... +[2023-02-27 12:41:43,662][00394] Waiting for process rollout_proc4 to join... +[2023-02-27 12:41:43,663][00394] Waiting for process rollout_proc5 to join... +[2023-02-27 12:41:43,668][00394] Waiting for process rollout_proc6 to join... +[2023-02-27 12:41:43,669][00394] Waiting for process rollout_proc7 to join... +[2023-02-27 12:41:43,671][00394] Batcher 0 profile tree view: +batching: 26.0965, releasing_batches: 0.0298 +[2023-02-27 12:41:43,673][00394] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0026 + wait_policy_total: 542.2606 +update_model: 8.0453 + weight_update: 0.0020 +one_step: 0.0034 + handle_policy_step: 547.5090 + deserialize: 16.1034, stack: 3.1577, obs_to_device_normalize: 120.7245, forward: 263.6819, send_messages: 28.3321 + prepare_outputs: 87.5433 + to_cpu: 52.8791 +[2023-02-27 12:41:43,674][00394] Learner 0 profile tree view: +misc: 0.0072, prepare_batch: 18.2706 +train: 81.1486 + epoch_init: 0.0088, minibatch_init: 0.0078, losses_postprocess: 0.6623, kl_divergence: 0.6086, after_optimizer: 2.8761 + calculate_losses: 26.9739 + losses_init: 0.0162, forward_head: 1.9850, bptt_initial: 17.3610, tail: 1.2509, advantages_returns: 0.2622, losses: 3.3113 + bptt: 2.4481 + bptt_forward_core: 2.3531 + update: 49.1618 + clip: 1.5064 +[2023-02-27 12:41:43,676][00394] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3743, enqueue_policy_requests: 143.4421, env_step: 864.5080, overhead: 23.1713, complete_rollouts: 7.4822 +save_policy_outputs: 21.7239 + split_output_tensors: 10.6965 +[2023-02-27 12:41:43,677][00394] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3087, enqueue_policy_requests: 152.6712, env_step: 853.8602, overhead: 23.0619, complete_rollouts: 7.4721 +save_policy_outputs: 21.9218 + split_output_tensors: 10.8747 +[2023-02-27 12:41:43,680][00394] Loop Runner_EvtLoop terminating... +[2023-02-27 12:41:43,682][00394] Runner profile tree view: +main_loop: 1172.2192 +[2023-02-27 12:41:43,683][00394] Collected {0: 16007168}, FPS: 3413.9 +[2023-02-27 12:41:43,781][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 12:41:43,783][00394] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-27 12:41:43,784][00394] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-27 12:41:43,788][00394] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-27 12:41:43,790][00394] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:41:43,792][00394] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-27 12:41:43,794][00394] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:41:43,795][00394] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-27 12:41:43,796][00394] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-27 12:41:43,797][00394] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-27 12:41:43,798][00394] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-27 12:41:43,800][00394] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-27 12:41:43,801][00394] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-27 12:41:43,802][00394] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-27 12:41:43,803][00394] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-27 12:41:43,842][00394] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:41:43,851][00394] RunningMeanStd input shape: (1,) +[2023-02-27 12:41:43,882][00394] ConvEncoder: input_channels=3 +[2023-02-27 12:41:44,027][00394] Conv encoder output size: 512 +[2023-02-27 12:41:44,029][00394] Policy head output size: 512 +[2023-02-27 12:41:44,139][00394] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth... +[2023-02-27 12:41:45,198][00394] Num frames 100... +[2023-02-27 12:41:45,320][00394] Num frames 200... +[2023-02-27 12:41:45,446][00394] Num frames 300... +[2023-02-27 12:41:45,577][00394] Num frames 400... +[2023-02-27 12:41:45,765][00394] Num frames 500... +[2023-02-27 12:41:45,935][00394] Num frames 600... +[2023-02-27 12:41:46,101][00394] Num frames 700... +[2023-02-27 12:41:46,268][00394] Num frames 800... +[2023-02-27 12:41:46,437][00394] Num frames 900... +[2023-02-27 12:41:46,611][00394] Num frames 1000... +[2023-02-27 12:41:46,781][00394] Num frames 1100... +[2023-02-27 12:41:46,959][00394] Num frames 1200... +[2023-02-27 12:41:47,130][00394] Num frames 1300... +[2023-02-27 12:41:47,299][00394] Num frames 1400... +[2023-02-27 12:41:47,467][00394] Num frames 1500... +[2023-02-27 12:41:47,647][00394] Num frames 1600... +[2023-02-27 12:41:47,859][00394] Avg episode rewards: #0: 41.959, true rewards: #0: 16.960 +[2023-02-27 12:41:47,862][00394] Avg episode reward: 41.959, avg true_objective: 16.960 +[2023-02-27 12:41:47,875][00394] Num frames 1700... +[2023-02-27 12:41:48,044][00394] Num frames 1800... +[2023-02-27 12:41:48,212][00394] Num frames 1900... +[2023-02-27 12:41:48,392][00394] Num frames 2000... +[2023-02-27 12:41:48,570][00394] Num frames 2100... +[2023-02-27 12:41:48,750][00394] Num frames 2200... +[2023-02-27 12:41:48,923][00394] Num frames 2300... +[2023-02-27 12:41:49,107][00394] Num frames 2400... +[2023-02-27 12:41:49,277][00394] Num frames 2500... +[2023-02-27 12:41:49,430][00394] Num frames 2600... +[2023-02-27 12:41:49,554][00394] Num frames 2700... +[2023-02-27 12:41:49,634][00394] Avg episode rewards: #0: 33.600, true rewards: #0: 13.600 +[2023-02-27 12:41:49,636][00394] Avg episode reward: 33.600, avg true_objective: 13.600 +[2023-02-27 12:41:49,737][00394] Num frames 2800... +[2023-02-27 12:41:49,860][00394] Num frames 2900... +[2023-02-27 12:41:49,978][00394] Num frames 3000... +[2023-02-27 12:41:50,117][00394] Num frames 3100... +[2023-02-27 12:41:50,237][00394] Num frames 3200... +[2023-02-27 12:41:50,364][00394] Num frames 3300... +[2023-02-27 12:41:50,490][00394] Num frames 3400... +[2023-02-27 12:41:50,612][00394] Num frames 3500... +[2023-02-27 12:41:50,731][00394] Num frames 3600... +[2023-02-27 12:41:50,856][00394] Num frames 3700... +[2023-02-27 12:41:50,978][00394] Num frames 3800... +[2023-02-27 12:41:51,109][00394] Num frames 3900... +[2023-02-27 12:41:51,232][00394] Num frames 4000... +[2023-02-27 12:41:51,356][00394] Num frames 4100... +[2023-02-27 12:41:51,480][00394] Num frames 4200... +[2023-02-27 12:41:51,608][00394] Num frames 4300... +[2023-02-27 12:41:51,730][00394] Num frames 4400... +[2023-02-27 12:41:51,859][00394] Num frames 4500... +[2023-02-27 12:41:51,981][00394] Num frames 4600... +[2023-02-27 12:41:52,110][00394] Num frames 4700... +[2023-02-27 12:41:52,242][00394] Num frames 4800... +[2023-02-27 12:41:52,323][00394] Avg episode rewards: #0: 41.066, true rewards: #0: 16.067 +[2023-02-27 12:41:52,325][00394] Avg episode reward: 41.066, avg true_objective: 16.067 +[2023-02-27 12:41:52,425][00394] Num frames 4900... +[2023-02-27 12:41:52,549][00394] Num frames 5000... +[2023-02-27 12:41:52,674][00394] Num frames 5100... +[2023-02-27 12:41:52,791][00394] Num frames 5200... +[2023-02-27 12:41:52,910][00394] Num frames 5300... +[2023-02-27 12:41:53,031][00394] Num frames 5400... +[2023-02-27 12:41:53,167][00394] Num frames 5500... +[2023-02-27 12:41:53,287][00394] Num frames 5600... +[2023-02-27 12:41:53,418][00394] Num frames 5700... +[2023-02-27 12:41:53,547][00394] Num frames 5800... +[2023-02-27 12:41:53,672][00394] Num frames 5900... +[2023-02-27 12:41:53,801][00394] Num frames 6000... +[2023-02-27 12:41:53,926][00394] Num frames 6100... +[2023-02-27 12:41:54,056][00394] Num frames 6200... +[2023-02-27 12:41:54,232][00394] Avg episode rewards: #0: 41.480, true rewards: #0: 15.730 +[2023-02-27 12:41:54,233][00394] Avg episode reward: 41.480, avg true_objective: 15.730 +[2023-02-27 12:41:54,249][00394] Num frames 6300... +[2023-02-27 12:41:54,372][00394] Num frames 6400... +[2023-02-27 12:41:54,500][00394] Num frames 6500... +[2023-02-27 12:41:54,626][00394] Num frames 6600... +[2023-02-27 12:41:54,747][00394] Num frames 6700... +[2023-02-27 12:41:54,870][00394] Num frames 6800... +[2023-02-27 12:41:55,001][00394] Num frames 6900... +[2023-02-27 12:41:55,124][00394] Num frames 7000... +[2023-02-27 12:41:55,251][00394] Num frames 7100... +[2023-02-27 12:41:55,343][00394] Avg episode rewards: #0: 36.848, true rewards: #0: 14.248 +[2023-02-27 12:41:55,346][00394] Avg episode reward: 36.848, avg true_objective: 14.248 +[2023-02-27 12:41:55,441][00394] Num frames 7200... +[2023-02-27 12:41:55,574][00394] Num frames 7300... +[2023-02-27 12:41:55,693][00394] Num frames 7400... +[2023-02-27 12:41:55,817][00394] Num frames 7500... +[2023-02-27 12:41:55,935][00394] Num frames 7600... +[2023-02-27 12:41:56,065][00394] Num frames 7700... +[2023-02-27 12:41:56,189][00394] Num frames 7800... +[2023-02-27 12:41:56,315][00394] Num frames 7900... +[2023-02-27 12:41:56,435][00394] Num frames 8000... +[2023-02-27 12:41:56,564][00394] Num frames 8100... +[2023-02-27 12:41:56,696][00394] Num frames 8200... +[2023-02-27 12:41:56,823][00394] Num frames 8300... +[2023-02-27 12:41:56,907][00394] Avg episode rewards: #0: 35.701, true rewards: #0: 13.868 +[2023-02-27 12:41:56,909][00394] Avg episode reward: 35.701, avg true_objective: 13.868 +[2023-02-27 12:41:57,019][00394] Num frames 8400... +[2023-02-27 12:41:57,149][00394] Num frames 8500... +[2023-02-27 12:41:57,281][00394] Num frames 8600... +[2023-02-27 12:41:57,405][00394] Num frames 8700... +[2023-02-27 12:41:57,533][00394] Num frames 8800... +[2023-02-27 12:41:57,653][00394] Num frames 8900... +[2023-02-27 12:41:57,768][00394] Num frames 9000... +[2023-02-27 12:41:57,919][00394] Avg episode rewards: #0: 32.963, true rewards: #0: 12.963 +[2023-02-27 12:41:57,921][00394] Avg episode reward: 32.963, avg true_objective: 12.963 +[2023-02-27 12:41:57,954][00394] Num frames 9100... +[2023-02-27 12:41:58,081][00394] Num frames 9200... +[2023-02-27 12:41:58,203][00394] Num frames 9300... +[2023-02-27 12:41:58,332][00394] Num frames 9400... +[2023-02-27 12:41:58,453][00394] Num frames 9500... +[2023-02-27 12:41:58,580][00394] Num frames 9600... +[2023-02-27 12:41:58,709][00394] Num frames 9700... +[2023-02-27 12:41:58,831][00394] Num frames 9800... +[2023-02-27 12:41:58,953][00394] Num frames 9900... +[2023-02-27 12:41:59,076][00394] Num frames 10000... +[2023-02-27 12:41:59,204][00394] Num frames 10100... +[2023-02-27 12:41:59,335][00394] Num frames 10200... +[2023-02-27 12:41:59,500][00394] Num frames 10300... +[2023-02-27 12:41:59,673][00394] Num frames 10400... +[2023-02-27 12:41:59,842][00394] Num frames 10500... +[2023-02-27 12:42:00,010][00394] Num frames 10600... +[2023-02-27 12:42:00,169][00394] Num frames 10700... +[2023-02-27 12:42:00,334][00394] Num frames 10800... +[2023-02-27 12:42:00,497][00394] Num frames 10900... +[2023-02-27 12:42:00,727][00394] Avg episode rewards: #0: 35.492, true rewards: #0: 13.742 +[2023-02-27 12:42:00,734][00394] Avg episode reward: 35.492, avg true_objective: 13.742 +[2023-02-27 12:42:00,749][00394] Num frames 11000... +[2023-02-27 12:42:00,920][00394] Num frames 11100... +[2023-02-27 12:42:01,087][00394] Num frames 11200... +[2023-02-27 12:42:01,258][00394] Num frames 11300... +[2023-02-27 12:42:01,437][00394] Num frames 11400... +[2023-02-27 12:42:01,604][00394] Num frames 11500... +[2023-02-27 12:42:01,769][00394] Num frames 11600... +[2023-02-27 12:42:01,945][00394] Num frames 11700... +[2023-02-27 12:42:02,111][00394] Avg episode rewards: #0: 33.069, true rewards: #0: 13.069 +[2023-02-27 12:42:02,114][00394] Avg episode reward: 33.069, avg true_objective: 13.069 +[2023-02-27 12:42:02,180][00394] Num frames 11800... +[2023-02-27 12:42:02,356][00394] Num frames 11900... +[2023-02-27 12:42:02,542][00394] Num frames 12000... +[2023-02-27 12:42:02,725][00394] Num frames 12100... +[2023-02-27 12:42:02,895][00394] Num frames 12200... +[2023-02-27 12:42:03,072][00394] Num frames 12300... +[2023-02-27 12:42:03,226][00394] Num frames 12400... +[2023-02-27 12:42:03,367][00394] Num frames 12500... +[2023-02-27 12:42:03,498][00394] Num frames 12600... +[2023-02-27 12:42:03,636][00394] Num frames 12700... +[2023-02-27 12:42:03,754][00394] Num frames 12800... +[2023-02-27 12:42:03,882][00394] Num frames 12900... +[2023-02-27 12:42:04,006][00394] Num frames 13000... +[2023-02-27 12:42:04,131][00394] Num frames 13100... +[2023-02-27 12:42:04,258][00394] Num frames 13200... +[2023-02-27 12:42:04,398][00394] Num frames 13300... +[2023-02-27 12:42:04,524][00394] Num frames 13400... +[2023-02-27 12:42:04,659][00394] Num frames 13500... +[2023-02-27 12:42:04,783][00394] Num frames 13600... +[2023-02-27 12:42:04,913][00394] Num frames 13700... +[2023-02-27 12:42:05,055][00394] Avg episode rewards: #0: 35.572, true rewards: #0: 13.772 +[2023-02-27 12:42:05,057][00394] Avg episode reward: 35.572, avg true_objective: 13.772 +[2023-02-27 12:43:31,744][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-27 12:43:32,209][00394] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-27 12:43:32,212][00394] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-27 12:43:32,214][00394] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-27 12:43:32,216][00394] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-27 12:43:32,218][00394] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-27 12:43:32,220][00394] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-27 12:43:32,222][00394] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-27 12:43:32,223][00394] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-27 12:43:32,225][00394] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-27 12:43:32,226][00394] Adding new argument 'hf_repository'='Clawoo/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-27 12:43:32,227][00394] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-27 12:43:32,228][00394] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-27 12:43:32,230][00394] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-27 12:43:32,231][00394] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-27 12:43:32,232][00394] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-27 12:43:32,256][00394] RunningMeanStd input shape: (3, 72, 128) +[2023-02-27 12:43:32,259][00394] RunningMeanStd input shape: (1,) +[2023-02-27 12:43:32,280][00394] ConvEncoder: input_channels=3 +[2023-02-27 12:43:32,341][00394] Conv encoder output size: 512 +[2023-02-27 12:43:32,343][00394] Policy head output size: 512 +[2023-02-27 12:43:32,372][00394] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth... +[2023-02-27 12:43:33,070][00394] Num frames 100... +[2023-02-27 12:43:33,186][00394] Num frames 200... +[2023-02-27 12:43:33,315][00394] Num frames 300... +[2023-02-27 12:43:33,432][00394] Num frames 400... +[2023-02-27 12:43:33,547][00394] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480 +[2023-02-27 12:43:33,551][00394] Avg episode reward: 5.480, avg true_objective: 4.480 +[2023-02-27 12:43:33,666][00394] Num frames 500... +[2023-02-27 12:43:33,822][00394] Num frames 600... +[2023-02-27 12:43:33,954][00394] Num frames 700... +[2023-02-27 12:43:34,098][00394] Num frames 800... +[2023-02-27 12:43:34,215][00394] Num frames 900... +[2023-02-27 12:43:34,339][00394] Num frames 1000... +[2023-02-27 12:43:34,453][00394] Num frames 1100... +[2023-02-27 12:43:34,574][00394] Num frames 1200... +[2023-02-27 12:43:34,694][00394] Num frames 1300... +[2023-02-27 12:43:34,801][00394] Avg episode rewards: #0: 11.220, true rewards: #0: 6.720 +[2023-02-27 12:43:34,805][00394] Avg episode reward: 11.220, avg true_objective: 6.720 +[2023-02-27 12:43:34,870][00394] Num frames 1400... +[2023-02-27 12:43:34,989][00394] Num frames 1500... +[2023-02-27 12:43:35,119][00394] Num frames 1600... +[2023-02-27 12:43:35,258][00394] Num frames 1700... +[2023-02-27 12:43:35,377][00394] Num frames 1800... +[2023-02-27 12:43:35,514][00394] Num frames 1900... +[2023-02-27 12:43:35,649][00394] Num frames 2000... +[2023-02-27 12:43:35,769][00394] Num frames 2100... +[2023-02-27 12:43:35,895][00394] Num frames 2200... +[2023-02-27 12:43:36,016][00394] Num frames 2300... +[2023-02-27 12:43:36,140][00394] Num frames 2400... +[2023-02-27 12:43:36,259][00394] Num frames 2500... +[2023-02-27 12:43:36,384][00394] Num frames 2600... +[2023-02-27 12:43:36,504][00394] Num frames 2700... +[2023-02-27 12:43:36,626][00394] Num frames 2800... +[2023-02-27 12:43:36,751][00394] Num frames 2900... +[2023-02-27 12:43:36,881][00394] Num frames 3000... +[2023-02-27 12:43:36,999][00394] Num frames 3100... +[2023-02-27 12:43:37,121][00394] Num frames 3200... +[2023-02-27 12:43:37,245][00394] Num frames 3300... +[2023-02-27 12:43:37,365][00394] Num frames 3400... +[2023-02-27 12:43:37,474][00394] Avg episode rewards: #0: 26.146, true rewards: #0: 11.480 +[2023-02-27 12:43:37,476][00394] Avg episode reward: 26.146, avg true_objective: 11.480 +[2023-02-27 12:43:37,549][00394] Num frames 3500... +[2023-02-27 12:43:37,666][00394] Num frames 3600... +[2023-02-27 12:43:37,787][00394] Num frames 3700... +[2023-02-27 12:43:37,915][00394] Num frames 3800... +[2023-02-27 12:43:38,037][00394] Num frames 3900... +[2023-02-27 12:43:38,200][00394] Avg episode rewards: #0: 22.220, true rewards: #0: 9.970 +[2023-02-27 12:43:38,203][00394] Avg episode reward: 22.220, avg true_objective: 9.970 +[2023-02-27 12:43:38,222][00394] Num frames 4000... +[2023-02-27 12:43:38,337][00394] Num frames 4100... +[2023-02-27 12:43:38,464][00394] Num frames 4200... +[2023-02-27 12:43:38,585][00394] Num frames 4300... +[2023-02-27 12:43:38,709][00394] Num frames 4400... +[2023-02-27 12:43:38,771][00394] Avg episode rewards: #0: 18.808, true rewards: #0: 8.808 +[2023-02-27 12:43:38,773][00394] Avg episode reward: 18.808, avg true_objective: 8.808 +[2023-02-27 12:43:38,896][00394] Num frames 4500... +[2023-02-27 12:43:39,021][00394] Num frames 4600... +[2023-02-27 12:43:39,146][00394] Num frames 4700... +[2023-02-27 12:43:39,267][00394] Num frames 4800... +[2023-02-27 12:43:39,386][00394] Num frames 4900... +[2023-02-27 12:43:39,557][00394] Num frames 5000... +[2023-02-27 12:43:39,726][00394] Num frames 5100... +[2023-02-27 12:43:39,890][00394] Num frames 5200... +[2023-02-27 12:43:40,056][00394] Num frames 5300... +[2023-02-27 12:43:40,218][00394] Num frames 5400... +[2023-02-27 12:43:40,389][00394] Num frames 5500... +[2023-02-27 12:43:40,556][00394] Num frames 5600... +[2023-02-27 12:43:40,736][00394] Num frames 5700... +[2023-02-27 12:43:40,908][00394] Num frames 5800... +[2023-02-27 12:43:41,077][00394] Num frames 5900... +[2023-02-27 12:43:41,257][00394] Num frames 6000... +[2023-02-27 12:43:41,444][00394] Num frames 6100... +[2023-02-27 12:43:41,618][00394] Num frames 6200... +[2023-02-27 12:43:41,793][00394] Num frames 6300... +[2023-02-27 12:43:41,980][00394] Num frames 6400... +[2023-02-27 12:43:42,161][00394] Num frames 6500... +[2023-02-27 12:43:42,226][00394] Avg episode rewards: #0: 26.006, true rewards: #0: 10.840 +[2023-02-27 12:43:42,229][00394] Avg episode reward: 26.006, avg true_objective: 10.840 +[2023-02-27 12:43:42,398][00394] Num frames 6600... +[2023-02-27 12:43:42,531][00394] Num frames 6700... +[2023-02-27 12:43:42,652][00394] Num frames 6800... +[2023-02-27 12:43:42,770][00394] Num frames 6900... +[2023-02-27 12:43:42,892][00394] Num frames 7000... +[2023-02-27 12:43:43,013][00394] Num frames 7100... +[2023-02-27 12:43:43,084][00394] Avg episode rewards: #0: 23.588, true rewards: #0: 10.160 +[2023-02-27 12:43:43,086][00394] Avg episode reward: 23.588, avg true_objective: 10.160 +[2023-02-27 12:43:43,210][00394] Num frames 7200... +[2023-02-27 12:43:43,330][00394] Num frames 7300... +[2023-02-27 12:43:43,454][00394] Num frames 7400... +[2023-02-27 12:43:43,573][00394] Num frames 7500... +[2023-02-27 12:43:43,691][00394] Num frames 7600... +[2023-02-27 12:43:43,813][00394] Num frames 7700... +[2023-02-27 12:43:43,933][00394] Num frames 7800... +[2023-02-27 12:43:44,053][00394] Num frames 7900... +[2023-02-27 12:43:44,178][00394] Num frames 8000... +[2023-02-27 12:43:44,302][00394] Num frames 8100... +[2023-02-27 12:43:44,433][00394] Num frames 8200... +[2023-02-27 12:43:44,550][00394] Num frames 8300... +[2023-02-27 12:43:44,676][00394] Num frames 8400... +[2023-02-27 12:43:44,800][00394] Num frames 8500... +[2023-02-27 12:43:44,927][00394] Num frames 8600... +[2023-02-27 12:43:45,055][00394] Num frames 8700... +[2023-02-27 12:43:45,179][00394] Num frames 8800... +[2023-02-27 12:43:45,305][00394] Num frames 8900... +[2023-02-27 12:43:45,431][00394] Num frames 9000... +[2023-02-27 12:43:45,549][00394] Num frames 9100... +[2023-02-27 12:43:45,683][00394] Num frames 9200... +[2023-02-27 12:43:45,755][00394] Avg episode rewards: #0: 27.765, true rewards: #0: 11.515 +[2023-02-27 12:43:45,756][00394] Avg episode reward: 27.765, avg true_objective: 11.515 +[2023-02-27 12:43:45,869][00394] Num frames 9300... +[2023-02-27 12:43:45,985][00394] Num frames 9400... +[2023-02-27 12:43:46,102][00394] Num frames 9500... +[2023-02-27 12:43:46,155][00394] Avg episode rewards: #0: 25.111, true rewards: #0: 10.556 +[2023-02-27 12:43:46,158][00394] Avg episode reward: 25.111, avg true_objective: 10.556 +[2023-02-27 12:43:46,287][00394] Num frames 9600... +[2023-02-27 12:43:46,422][00394] Num frames 9700... +[2023-02-27 12:43:46,543][00394] Num frames 9800... +[2023-02-27 12:43:46,672][00394] Num frames 9900... +[2023-02-27 12:43:46,799][00394] Num frames 10000... +[2023-02-27 12:43:46,908][00394] Avg episode rewards: #0: 23.544, true rewards: #0: 10.044 +[2023-02-27 12:43:46,909][00394] Avg episode reward: 23.544, avg true_objective: 10.044 +[2023-02-27 12:44:46,898][00394] Replay video saved to /content/train_dir/default_experiment/replay.mp4!