diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1170,3 +1170,1546 @@ main_loop: 1195.0150 [2023-02-26 07:14:35,913][06480] Avg episode rewards: #0: 23.750, true rewards: #0: 10.550 [2023-02-26 07:14:35,919][06480] Avg episode reward: 23.750, avg true_objective: 10.550 [2023-02-26 07:15:38,696][06480] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-26 07:15:42,696][06480] The model has been pushed to https://huggingface.co/sd99/rl_course_vizdoom_health_gathering_supreme +[2023-02-26 07:17:10,353][06480] Environment doom_basic already registered, overwriting... +[2023-02-26 07:17:10,356][06480] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-26 07:17:10,357][06480] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-26 07:17:10,363][06480] Environment doom_dm already registered, overwriting... +[2023-02-26 07:17:10,364][06480] Environment doom_dwango5 already registered, overwriting... +[2023-02-26 07:17:10,366][06480] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-26 07:17:10,367][06480] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-26 07:17:10,368][06480] Environment doom_my_way_home already registered, overwriting... +[2023-02-26 07:17:10,373][06480] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-26 07:17:10,374][06480] Environment doom_defend_the_center already registered, overwriting... +[2023-02-26 07:17:10,375][06480] Environment doom_defend_the_line already registered, overwriting... +[2023-02-26 07:17:10,376][06480] Environment doom_health_gathering already registered, overwriting... +[2023-02-26 07:17:10,377][06480] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-26 07:17:10,391][06480] Environment doom_battle already registered, overwriting... +[2023-02-26 07:17:10,393][06480] Environment doom_battle2 already registered, overwriting... +[2023-02-26 07:17:10,394][06480] Environment doom_duel_bots already registered, overwriting... +[2023-02-26 07:17:10,397][06480] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-26 07:17:10,398][06480] Environment doom_duel already registered, overwriting... +[2023-02-26 07:17:10,402][06480] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-26 07:17:10,403][06480] Environment doom_benchmark already registered, overwriting... +[2023-02-26 07:17:10,404][06480] register_encoder_factory: +[2023-02-26 07:17:10,427][06480] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-26 07:17:10,429][06480] Overriding arg 'train_for_env_steps' with value 10000000 passed from command line +[2023-02-26 07:17:10,437][06480] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-26 07:17:10,439][06480] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-26 07:17:10,442][06480] Weights and Biases integration disabled +[2023-02-26 07:17:10,446][06480] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-26 07:17:11,981][06480] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=10000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-26 07:17:11,986][06480] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-26 07:17:11,991][06480] Rollout worker 0 uses device cpu +[2023-02-26 07:17:11,992][06480] Rollout worker 1 uses device cpu +[2023-02-26 07:17:11,994][06480] Rollout worker 2 uses device cpu +[2023-02-26 07:17:11,996][06480] Rollout worker 3 uses device cpu +[2023-02-26 07:17:11,997][06480] Rollout worker 4 uses device cpu +[2023-02-26 07:17:11,998][06480] Rollout worker 5 uses device cpu +[2023-02-26 07:17:12,000][06480] Rollout worker 6 uses device cpu +[2023-02-26 07:17:12,001][06480] Rollout worker 7 uses device cpu +[2023-02-26 07:17:12,115][06480] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-26 07:17:12,117][06480] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-26 07:17:12,148][06480] Starting all processes... +[2023-02-26 07:17:12,149][06480] Starting process learner_proc0 +[2023-02-26 07:17:12,288][06480] Starting all processes... +[2023-02-26 07:17:12,301][06480] Starting process inference_proc0-0 +[2023-02-26 07:17:12,301][06480] Starting process rollout_proc0 +[2023-02-26 07:17:12,306][06480] Starting process rollout_proc1 +[2023-02-26 07:17:12,306][06480] Starting process rollout_proc2 +[2023-02-26 07:17:12,306][06480] Starting process rollout_proc3 +[2023-02-26 07:17:12,307][06480] Starting process rollout_proc4 +[2023-02-26 07:17:12,307][06480] Starting process rollout_proc5 +[2023-02-26 07:17:12,307][06480] Starting process rollout_proc6 +[2023-02-26 07:17:12,307][06480] Starting process rollout_proc7 +[2023-02-26 07:17:22,066][30933] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-26 07:17:22,066][30933] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-26 07:17:22,120][30933] Num visible devices: 1 +[2023-02-26 07:17:22,176][30933] Starting seed is not provided +[2023-02-26 07:17:22,177][30933] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-26 07:17:22,178][30933] Initializing actor-critic model on device cuda:0 +[2023-02-26 07:17:22,179][30933] RunningMeanStd input shape: (3, 72, 128) +[2023-02-26 07:17:22,180][30933] RunningMeanStd input shape: (1,) +[2023-02-26 07:17:22,233][30933] ConvEncoder: input_channels=3 +[2023-02-26 07:17:23,022][30947] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-26 07:17:23,026][30947] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-26 07:17:23,104][30947] Num visible devices: 1 +[2023-02-26 07:17:23,283][30933] Conv encoder output size: 512 +[2023-02-26 07:17:23,290][30933] Policy head output size: 512 +[2023-02-26 07:17:23,411][30933] Created Actor Critic model with architecture: +[2023-02-26 07:17:23,411][30933] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-26 07:17:23,775][30948] Worker 0 uses CPU cores [0] +[2023-02-26 07:17:24,015][30951] Worker 2 uses CPU cores [0] +[2023-02-26 07:17:24,232][30958] Worker 3 uses CPU cores [1] +[2023-02-26 07:17:24,265][30952] Worker 1 uses CPU cores [1] +[2023-02-26 07:17:24,597][30960] Worker 5 uses CPU cores [1] +[2023-02-26 07:17:24,687][30968] Worker 7 uses CPU cores [1] +[2023-02-26 07:17:24,701][30962] Worker 4 uses CPU cores [0] +[2023-02-26 07:17:24,810][30970] Worker 6 uses CPU cores [0] +[2023-02-26 07:17:26,686][30933] Using optimizer +[2023-02-26 07:17:26,687][30933] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-26 07:17:26,715][30933] Loading model from checkpoint +[2023-02-26 07:17:26,719][30933] Loaded experiment state at self.train_step=978, self.env_steps=4005888 +[2023-02-26 07:17:26,720][30933] Initialized policy 0 weights for model version 978 +[2023-02-26 07:17:26,723][30933] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-26 07:17:26,730][30933] LearnerWorker_p0 finished initialization! +[2023-02-26 07:17:26,934][30947] RunningMeanStd input shape: (3, 72, 128) +[2023-02-26 07:17:26,935][30947] RunningMeanStd input shape: (1,) +[2023-02-26 07:17:26,947][30947] ConvEncoder: input_channels=3 +[2023-02-26 07:17:27,045][30947] Conv encoder output size: 512 +[2023-02-26 07:17:27,046][30947] Policy head output size: 512 +[2023-02-26 07:17:29,984][06480] Inference worker 0-0 is ready! +[2023-02-26 07:17:29,987][06480] All inference workers are ready! Signal rollout workers to start! +[2023-02-26 07:17:30,132][30951] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,136][30948] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,149][30970] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,166][30962] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,187][30958] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,226][30968] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,220][30960] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,223][30952] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-26 07:17:30,447][06480] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4005888. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-26 07:17:31,436][30951] Decorrelating experience for 0 frames... +[2023-02-26 07:17:31,458][30962] Decorrelating experience for 0 frames... +[2023-02-26 07:17:31,799][30958] Decorrelating experience for 0 frames... +[2023-02-26 07:17:31,802][30968] Decorrelating experience for 0 frames... +[2023-02-26 07:17:31,807][30960] Decorrelating experience for 0 frames... +[2023-02-26 07:17:32,107][06480] Heartbeat connected on Batcher_0 +[2023-02-26 07:17:32,112][06480] Heartbeat connected on LearnerWorker_p0 +[2023-02-26 07:17:32,157][06480] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-26 07:17:32,295][30951] Decorrelating experience for 32 frames... +[2023-02-26 07:17:33,036][30952] Decorrelating experience for 0 frames... +[2023-02-26 07:17:33,068][30948] Decorrelating experience for 0 frames... +[2023-02-26 07:17:33,379][30958] Decorrelating experience for 32 frames... +[2023-02-26 07:17:33,385][30960] Decorrelating experience for 32 frames... +[2023-02-26 07:17:33,383][30968] Decorrelating experience for 32 frames... +[2023-02-26 07:17:33,949][30951] Decorrelating experience for 64 frames... +[2023-02-26 07:17:34,330][30970] Decorrelating experience for 0 frames... +[2023-02-26 07:17:34,395][30952] Decorrelating experience for 32 frames... +[2023-02-26 07:17:34,639][30948] Decorrelating experience for 32 frames... +[2023-02-26 07:17:34,879][30958] Decorrelating experience for 64 frames... +[2023-02-26 07:17:34,887][30960] Decorrelating experience for 64 frames... +[2023-02-26 07:17:35,301][30951] Decorrelating experience for 96 frames... +[2023-02-26 07:17:35,446][06480] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-26 07:17:35,525][06480] Heartbeat connected on RolloutWorker_w2 +[2023-02-26 07:17:35,700][30952] Decorrelating experience for 64 frames... +[2023-02-26 07:17:35,904][30970] Decorrelating experience for 32 frames... +[2023-02-26 07:17:35,944][30962] Decorrelating experience for 32 frames... +[2023-02-26 07:17:36,047][30968] Decorrelating experience for 64 frames... +[2023-02-26 07:17:36,114][30960] Decorrelating experience for 96 frames... +[2023-02-26 07:17:36,390][06480] Heartbeat connected on RolloutWorker_w5 +[2023-02-26 07:17:36,464][30948] Decorrelating experience for 64 frames... +[2023-02-26 07:17:37,306][30970] Decorrelating experience for 64 frames... +[2023-02-26 07:17:37,434][30958] Decorrelating experience for 96 frames... +[2023-02-26 07:17:37,668][06480] Heartbeat connected on RolloutWorker_w3 +[2023-02-26 07:17:37,732][30948] Decorrelating experience for 96 frames... +[2023-02-26 07:17:38,234][06480] Heartbeat connected on RolloutWorker_w0 +[2023-02-26 07:17:39,128][30968] Decorrelating experience for 96 frames... +[2023-02-26 07:17:39,624][06480] Heartbeat connected on RolloutWorker_w7 +[2023-02-26 07:17:40,168][30962] Decorrelating experience for 64 frames... +[2023-02-26 07:17:40,446][06480] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 117.8. Samples: 1178. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-26 07:17:40,448][06480] Avg episode reward: [(0, '2.110')] +[2023-02-26 07:17:41,095][30970] Decorrelating experience for 96 frames... +[2023-02-26 07:17:41,481][30933] Signal inference workers to stop experience collection... +[2023-02-26 07:17:41,500][30947] InferenceWorker_p0-w0: stopping experience collection +[2023-02-26 07:17:41,603][30952] Decorrelating experience for 96 frames... +[2023-02-26 07:17:41,635][06480] Heartbeat connected on RolloutWorker_w6 +[2023-02-26 07:17:41,856][06480] Heartbeat connected on RolloutWorker_w1 +[2023-02-26 07:17:41,957][30962] Decorrelating experience for 96 frames... +[2023-02-26 07:17:42,014][06480] Heartbeat connected on RolloutWorker_w4 +[2023-02-26 07:17:44,109][30933] Signal inference workers to resume experience collection... +[2023-02-26 07:17:44,109][30947] InferenceWorker_p0-w0: resuming experience collection +[2023-02-26 07:17:45,446][06480] Fps is (10 sec: 409.6, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 4009984. Throughput: 0: 180.5. Samples: 2708. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-26 07:17:45,449][06480] Avg episode reward: [(0, '4.592')] +[2023-02-26 07:17:50,449][06480] Fps is (10 sec: 2047.4, 60 sec: 1023.9, 300 sec: 1023.9). Total num frames: 4026368. Throughput: 0: 211.9. Samples: 4238. Policy #0 lag: (min: 0.0, avg: 0.4, max: 3.0) +[2023-02-26 07:17:50,452][06480] Avg episode reward: [(0, '8.576')] +[2023-02-26 07:17:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 1474.6, 300 sec: 1474.6). Total num frames: 4042752. Throughput: 0: 342.3. Samples: 8556. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:17:55,453][06480] Avg episode reward: [(0, '12.684')] +[2023-02-26 07:17:56,086][30947] Updated weights for policy 0, policy_version 988 (0.0012) +[2023-02-26 07:18:00,446][06480] Fps is (10 sec: 3687.4, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 4063232. Throughput: 0: 510.3. Samples: 15310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:18:00,452][06480] Avg episode reward: [(0, '15.241')] +[2023-02-26 07:18:05,446][06480] Fps is (10 sec: 4095.9, 60 sec: 2223.6, 300 sec: 2223.6). Total num frames: 4083712. Throughput: 0: 536.2. Samples: 18768. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:18:05,456][06480] Avg episode reward: [(0, '18.738')] +[2023-02-26 07:18:05,544][30947] Updated weights for policy 0, policy_version 998 (0.0014) +[2023-02-26 07:18:10,446][06480] Fps is (10 sec: 3686.3, 60 sec: 2355.2, 300 sec: 2355.2). Total num frames: 4100096. Throughput: 0: 586.5. Samples: 23460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:18:10,453][06480] Avg episode reward: [(0, '20.240')] +[2023-02-26 07:18:15,446][06480] Fps is (10 sec: 3276.8, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 4116480. Throughput: 0: 628.0. Samples: 28260. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:18:15,454][06480] Avg episode reward: [(0, '20.983')] +[2023-02-26 07:18:17,818][30947] Updated weights for policy 0, policy_version 1008 (0.0011) +[2023-02-26 07:18:20,446][06480] Fps is (10 sec: 4096.1, 60 sec: 2703.4, 300 sec: 2703.4). Total num frames: 4141056. Throughput: 0: 702.8. Samples: 31624. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:18:20,454][06480] Avg episode reward: [(0, '22.618')] +[2023-02-26 07:18:25,452][06480] Fps is (10 sec: 4503.1, 60 sec: 2829.7, 300 sec: 2829.7). Total num frames: 4161536. Throughput: 0: 827.4. Samples: 38414. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:18:25,460][06480] Avg episode reward: [(0, '24.076')] +[2023-02-26 07:18:28,204][30947] Updated weights for policy 0, policy_version 1018 (0.0022) +[2023-02-26 07:18:30,450][06480] Fps is (10 sec: 3275.6, 60 sec: 2798.8, 300 sec: 2798.8). Total num frames: 4173824. Throughput: 0: 887.3. Samples: 42642. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:18:30,452][06480] Avg episode reward: [(0, '24.339')] +[2023-02-26 07:18:35,446][06480] Fps is (10 sec: 2868.8, 60 sec: 3072.0, 300 sec: 2835.7). Total num frames: 4190208. Throughput: 0: 903.6. Samples: 44898. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:18:35,454][06480] Avg episode reward: [(0, '22.478')] +[2023-02-26 07:18:40,446][06480] Fps is (10 sec: 2868.3, 60 sec: 3276.8, 300 sec: 2808.7). Total num frames: 4202496. Throughput: 0: 908.8. Samples: 49452. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:18:40,452][06480] Avg episode reward: [(0, '23.040')] +[2023-02-26 07:18:42,243][30947] Updated weights for policy 0, policy_version 1028 (0.0020) +[2023-02-26 07:18:45,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 2839.9). Total num frames: 4218880. Throughput: 0: 856.7. Samples: 53860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:18:45,449][06480] Avg episode reward: [(0, '23.201')] +[2023-02-26 07:18:50,449][06480] Fps is (10 sec: 2866.5, 60 sec: 3413.4, 300 sec: 2815.9). Total num frames: 4231168. Throughput: 0: 827.2. Samples: 55996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:18:50,455][06480] Avg episode reward: [(0, '24.211')] +[2023-02-26 07:18:55,328][30947] Updated weights for policy 0, policy_version 1038 (0.0022) +[2023-02-26 07:18:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 2891.3). Total num frames: 4251648. Throughput: 0: 825.0. Samples: 60584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:18:55,449][06480] Avg episode reward: [(0, '24.330')] +[2023-02-26 07:19:00,446][06480] Fps is (10 sec: 4096.9, 60 sec: 3481.6, 300 sec: 2958.2). Total num frames: 4272128. Throughput: 0: 866.8. Samples: 67264. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:00,453][06480] Avg episode reward: [(0, '24.813')] +[2023-02-26 07:19:04,945][30947] Updated weights for policy 0, policy_version 1048 (0.0014) +[2023-02-26 07:19:05,447][06480] Fps is (10 sec: 4095.7, 60 sec: 3481.6, 300 sec: 3018.1). Total num frames: 4292608. Throughput: 0: 868.4. Samples: 70704. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:19:05,453][06480] Avg episode reward: [(0, '26.808')] +[2023-02-26 07:19:05,457][30933] Saving new best policy, reward=26.808! +[2023-02-26 07:19:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 2990.1). Total num frames: 4304896. Throughput: 0: 819.3. Samples: 75280. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:19:10,448][06480] Avg episode reward: [(0, '26.542')] +[2023-02-26 07:19:10,469][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001051_4304896.pth... +[2023-02-26 07:19:10,692][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000880_3604480.pth +[2023-02-26 07:19:15,446][06480] Fps is (10 sec: 3277.1, 60 sec: 3481.6, 300 sec: 3042.8). Total num frames: 4325376. Throughput: 0: 833.7. Samples: 80156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:15,452][06480] Avg episode reward: [(0, '25.196')] +[2023-02-26 07:19:17,099][30947] Updated weights for policy 0, policy_version 1058 (0.0021) +[2023-02-26 07:19:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3090.6). Total num frames: 4345856. Throughput: 0: 857.7. Samples: 83494. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:20,452][06480] Avg episode reward: [(0, '25.079')] +[2023-02-26 07:19:25,446][06480] Fps is (10 sec: 4095.9, 60 sec: 3413.6, 300 sec: 3134.3). Total num frames: 4366336. Throughput: 0: 902.8. Samples: 90080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:19:25,449][06480] Avg episode reward: [(0, '23.154')] +[2023-02-26 07:19:27,739][30947] Updated weights for policy 0, policy_version 1068 (0.0019) +[2023-02-26 07:19:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.5, 300 sec: 3106.2). Total num frames: 4378624. Throughput: 0: 897.3. Samples: 94238. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:30,452][06480] Avg episode reward: [(0, '23.155')] +[2023-02-26 07:19:35,446][06480] Fps is (10 sec: 3276.9, 60 sec: 3481.6, 300 sec: 3145.8). Total num frames: 4399104. Throughput: 0: 897.4. Samples: 96376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:35,452][06480] Avg episode reward: [(0, '22.533')] +[2023-02-26 07:19:38,761][30947] Updated weights for policy 0, policy_version 1078 (0.0014) +[2023-02-26 07:19:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3182.3). Total num frames: 4419584. Throughput: 0: 944.4. Samples: 103082. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:19:40,453][06480] Avg episode reward: [(0, '22.927')] +[2023-02-26 07:19:45,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3216.1). Total num frames: 4440064. Throughput: 0: 930.8. Samples: 109150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:45,449][06480] Avg episode reward: [(0, '23.907')] +[2023-02-26 07:19:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3189.0). Total num frames: 4452352. Throughput: 0: 901.7. Samples: 111278. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:19:50,454][06480] Avg episode reward: [(0, '23.716')] +[2023-02-26 07:19:50,563][30947] Updated weights for policy 0, policy_version 1088 (0.0022) +[2023-02-26 07:19:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3220.3). Total num frames: 4472832. Throughput: 0: 905.4. Samples: 116024. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:19:55,454][06480] Avg episode reward: [(0, '24.564')] +[2023-02-26 07:20:00,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3249.5). Total num frames: 4493312. Throughput: 0: 938.3. Samples: 122378. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:20:00,454][06480] Avg episode reward: [(0, '23.580')] +[2023-02-26 07:20:00,999][30947] Updated weights for policy 0, policy_version 1098 (0.0017) +[2023-02-26 07:20:05,450][06480] Fps is (10 sec: 3685.1, 60 sec: 3618.0, 300 sec: 3250.3). Total num frames: 4509696. Throughput: 0: 932.9. Samples: 125480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:20:05,457][06480] Avg episode reward: [(0, '23.591')] +[2023-02-26 07:20:10,448][06480] Fps is (10 sec: 2866.8, 60 sec: 3618.0, 300 sec: 3225.6). Total num frames: 4521984. Throughput: 0: 878.1. Samples: 129596. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:20:10,453][06480] Avg episode reward: [(0, '23.469')] +[2023-02-26 07:20:14,019][30947] Updated weights for policy 0, policy_version 1108 (0.0024) +[2023-02-26 07:20:15,446][06480] Fps is (10 sec: 3278.0, 60 sec: 3618.1, 300 sec: 3252.0). Total num frames: 4542464. Throughput: 0: 899.6. Samples: 134718. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:20:15,454][06480] Avg episode reward: [(0, '22.784')] +[2023-02-26 07:20:20,447][06480] Fps is (10 sec: 4096.5, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 4562944. Throughput: 0: 924.3. Samples: 137972. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:20:20,453][06480] Avg episode reward: [(0, '22.696')] +[2023-02-26 07:20:24,118][30947] Updated weights for policy 0, policy_version 1118 (0.0016) +[2023-02-26 07:20:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 4579328. Throughput: 0: 906.3. Samples: 143866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:20:25,453][06480] Avg episode reward: [(0, '21.762')] +[2023-02-26 07:20:30,447][06480] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 4595712. Throughput: 0: 859.9. Samples: 147848. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:20:30,454][06480] Avg episode reward: [(0, '22.195')] +[2023-02-26 07:20:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 4612096. Throughput: 0: 858.7. Samples: 149918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:20:35,454][06480] Avg episode reward: [(0, '22.617')] +[2023-02-26 07:20:37,073][30947] Updated weights for policy 0, policy_version 1128 (0.0033) +[2023-02-26 07:20:40,446][06480] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3298.4). Total num frames: 4632576. Throughput: 0: 894.0. Samples: 156256. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:20:40,449][06480] Avg episode reward: [(0, '22.455')] +[2023-02-26 07:20:45,449][06480] Fps is (10 sec: 4094.9, 60 sec: 3549.7, 300 sec: 3318.8). Total num frames: 4653056. Throughput: 0: 878.9. Samples: 161930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:20:45,452][06480] Avg episode reward: [(0, '22.442')] +[2023-02-26 07:20:48,695][30947] Updated weights for policy 0, policy_version 1138 (0.0032) +[2023-02-26 07:20:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3297.3). Total num frames: 4665344. Throughput: 0: 853.7. Samples: 163892. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:20:50,455][06480] Avg episode reward: [(0, '23.387')] +[2023-02-26 07:20:55,446][06480] Fps is (10 sec: 2868.0, 60 sec: 3481.6, 300 sec: 3296.8). Total num frames: 4681728. Throughput: 0: 862.0. Samples: 168384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:20:55,452][06480] Avg episode reward: [(0, '24.021')] +[2023-02-26 07:20:59,381][30947] Updated weights for policy 0, policy_version 1148 (0.0023) +[2023-02-26 07:21:00,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3335.3). Total num frames: 4706304. Throughput: 0: 900.8. Samples: 175254. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:21:00,449][06480] Avg episode reward: [(0, '23.355')] +[2023-02-26 07:21:05,448][06480] Fps is (10 sec: 4504.9, 60 sec: 3618.3, 300 sec: 3353.0). Total num frames: 4726784. Throughput: 0: 904.9. Samples: 178692. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:21:05,451][06480] Avg episode reward: [(0, '23.583')] +[2023-02-26 07:21:10,446][06480] Fps is (10 sec: 3276.7, 60 sec: 3618.2, 300 sec: 3332.7). Total num frames: 4739072. Throughput: 0: 877.9. Samples: 183370. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:21:10,453][06480] Avg episode reward: [(0, '23.515')] +[2023-02-26 07:21:10,464][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001157_4739072.pth... +[2023-02-26 07:21:10,671][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth +[2023-02-26 07:21:11,217][30947] Updated weights for policy 0, policy_version 1158 (0.0015) +[2023-02-26 07:21:15,449][06480] Fps is (10 sec: 2457.3, 60 sec: 3481.4, 300 sec: 3313.2). Total num frames: 4751360. Throughput: 0: 875.0. Samples: 187226. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:21:15,454][06480] Avg episode reward: [(0, '23.771')] +[2023-02-26 07:21:20,446][06480] Fps is (10 sec: 2457.7, 60 sec: 3345.1, 300 sec: 3294.6). Total num frames: 4763648. Throughput: 0: 876.2. Samples: 189346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:21:20,450][06480] Avg episode reward: [(0, '23.132')] +[2023-02-26 07:21:25,230][30947] Updated weights for policy 0, policy_version 1168 (0.0022) +[2023-02-26 07:21:25,446][06480] Fps is (10 sec: 3277.7, 60 sec: 3413.3, 300 sec: 3311.7). Total num frames: 4784128. Throughput: 0: 845.2. Samples: 194292. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:21:25,455][06480] Avg episode reward: [(0, '22.853')] +[2023-02-26 07:21:30,447][06480] Fps is (10 sec: 3276.7, 60 sec: 3345.1, 300 sec: 3293.9). Total num frames: 4796416. Throughput: 0: 815.0. Samples: 198604. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:21:30,449][06480] Avg episode reward: [(0, '21.906')] +[2023-02-26 07:21:35,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3293.5). Total num frames: 4812800. Throughput: 0: 820.4. Samples: 200812. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:21:35,453][06480] Avg episode reward: [(0, '23.725')] +[2023-02-26 07:21:37,338][30947] Updated weights for policy 0, policy_version 1178 (0.0019) +[2023-02-26 07:21:40,446][06480] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3326.0). Total num frames: 4837376. Throughput: 0: 863.2. Samples: 207228. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:21:40,452][06480] Avg episode reward: [(0, '25.590')] +[2023-02-26 07:21:45,454][06480] Fps is (10 sec: 4502.1, 60 sec: 3413.0, 300 sec: 3341.0). Total num frames: 4857856. Throughput: 0: 854.1. Samples: 213696. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:21:45,456][06480] Avg episode reward: [(0, '24.209')] +[2023-02-26 07:21:47,767][30947] Updated weights for policy 0, policy_version 1188 (0.0012) +[2023-02-26 07:21:50,447][06480] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3324.1). Total num frames: 4870144. Throughput: 0: 825.3. Samples: 215830. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:21:50,450][06480] Avg episode reward: [(0, '25.376')] +[2023-02-26 07:21:55,450][06480] Fps is (10 sec: 3278.3, 60 sec: 3481.4, 300 sec: 3338.6). Total num frames: 4890624. Throughput: 0: 816.1. Samples: 220098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:21:55,456][06480] Avg episode reward: [(0, '25.045')] +[2023-02-26 07:21:59,025][30947] Updated weights for policy 0, policy_version 1198 (0.0023) +[2023-02-26 07:22:00,446][06480] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3352.7). Total num frames: 4911104. Throughput: 0: 880.8. Samples: 226860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:22:00,449][06480] Avg episode reward: [(0, '25.060')] +[2023-02-26 07:22:05,446][06480] Fps is (10 sec: 4097.4, 60 sec: 3413.4, 300 sec: 3366.2). Total num frames: 4931584. Throughput: 0: 909.2. Samples: 230262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:22:05,450][06480] Avg episode reward: [(0, '23.819')] +[2023-02-26 07:22:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3350.0). Total num frames: 4943872. Throughput: 0: 903.5. Samples: 234948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:22:10,449][06480] Avg episode reward: [(0, '24.578')] +[2023-02-26 07:22:10,531][30947] Updated weights for policy 0, policy_version 1208 (0.0011) +[2023-02-26 07:22:15,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3363.0). Total num frames: 4964352. Throughput: 0: 917.1. Samples: 239872. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:22:15,449][06480] Avg episode reward: [(0, '23.869')] +[2023-02-26 07:22:20,447][06480] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3375.7). Total num frames: 4984832. Throughput: 0: 942.6. Samples: 243228. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:22:20,458][06480] Avg episode reward: [(0, '23.727')] +[2023-02-26 07:22:20,762][30947] Updated weights for policy 0, policy_version 1218 (0.0016) +[2023-02-26 07:22:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3387.9). Total num frames: 5005312. Throughput: 0: 947.9. Samples: 249882. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:22:25,453][06480] Avg episode reward: [(0, '23.303')] +[2023-02-26 07:22:30,446][06480] Fps is (10 sec: 3277.0, 60 sec: 3686.4, 300 sec: 3429.5). Total num frames: 5017600. Throughput: 0: 900.2. Samples: 254198. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:22:30,453][06480] Avg episode reward: [(0, '23.259')] +[2023-02-26 07:22:33,445][30947] Updated weights for policy 0, policy_version 1228 (0.0015) +[2023-02-26 07:22:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3499.0). Total num frames: 5038080. Throughput: 0: 900.7. Samples: 256360. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:22:35,451][06480] Avg episode reward: [(0, '23.555')] +[2023-02-26 07:22:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 5058560. Throughput: 0: 957.1. Samples: 263166. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:22:40,453][06480] Avg episode reward: [(0, '23.870')] +[2023-02-26 07:22:42,103][30947] Updated weights for policy 0, policy_version 1238 (0.0024) +[2023-02-26 07:22:45,453][06480] Fps is (10 sec: 4093.3, 60 sec: 3686.5, 300 sec: 3568.3). Total num frames: 5079040. Throughput: 0: 943.7. Samples: 269332. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:22:45,464][06480] Avg episode reward: [(0, '23.773')] +[2023-02-26 07:22:50,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 5095424. Throughput: 0: 915.9. Samples: 271476. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:22:50,454][06480] Avg episode reward: [(0, '23.686')] +[2023-02-26 07:22:54,557][30947] Updated weights for policy 0, policy_version 1248 (0.0021) +[2023-02-26 07:22:55,446][06480] Fps is (10 sec: 3279.0, 60 sec: 3686.6, 300 sec: 3554.5). Total num frames: 5111808. Throughput: 0: 919.6. Samples: 276332. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:22:55,449][06480] Avg episode reward: [(0, '23.833')] +[2023-02-26 07:23:00,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 5136384. Throughput: 0: 963.0. Samples: 283206. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:00,449][06480] Avg episode reward: [(0, '23.801')] +[2023-02-26 07:23:04,153][30947] Updated weights for policy 0, policy_version 1258 (0.0012) +[2023-02-26 07:23:05,448][06480] Fps is (10 sec: 4095.2, 60 sec: 3686.3, 300 sec: 3568.4). Total num frames: 5152768. Throughput: 0: 960.0. Samples: 286428. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:05,455][06480] Avg episode reward: [(0, '23.005')] +[2023-02-26 07:23:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 5169152. Throughput: 0: 908.0. Samples: 290740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:23:10,454][06480] Avg episode reward: [(0, '22.873')] +[2023-02-26 07:23:10,468][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001262_5169152.pth... +[2023-02-26 07:23:10,687][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001051_4304896.pth +[2023-02-26 07:23:15,446][06480] Fps is (10 sec: 3277.5, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 5185536. Throughput: 0: 927.2. Samples: 295922. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:23:15,449][06480] Avg episode reward: [(0, '23.774')] +[2023-02-26 07:23:16,487][30947] Updated weights for policy 0, policy_version 1268 (0.0012) +[2023-02-26 07:23:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3554.6). Total num frames: 5210112. Throughput: 0: 953.0. Samples: 299246. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:20,454][06480] Avg episode reward: [(0, '24.114')] +[2023-02-26 07:23:25,450][06480] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 5226496. Throughput: 0: 937.9. Samples: 305372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:25,452][06480] Avg episode reward: [(0, '25.747')] +[2023-02-26 07:23:27,330][30947] Updated weights for policy 0, policy_version 1278 (0.0012) +[2023-02-26 07:23:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 5242880. Throughput: 0: 897.5. Samples: 309714. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:23:30,452][06480] Avg episode reward: [(0, '24.370')] +[2023-02-26 07:23:35,446][06480] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5259264. Throughput: 0: 900.3. Samples: 311988. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:23:35,453][06480] Avg episode reward: [(0, '23.740')] +[2023-02-26 07:23:38,207][30947] Updated weights for policy 0, policy_version 1288 (0.0016) +[2023-02-26 07:23:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 5283840. Throughput: 0: 947.5. Samples: 318968. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:23:40,454][06480] Avg episode reward: [(0, '22.615')] +[2023-02-26 07:23:45,448][06480] Fps is (10 sec: 4504.7, 60 sec: 3755.0, 300 sec: 3637.8). Total num frames: 5304320. Throughput: 0: 926.0. Samples: 324878. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:45,452][06480] Avg episode reward: [(0, '22.077')] +[2023-02-26 07:23:49,623][30947] Updated weights for policy 0, policy_version 1298 (0.0013) +[2023-02-26 07:23:50,448][06480] Fps is (10 sec: 3276.3, 60 sec: 3686.3, 300 sec: 3610.0). Total num frames: 5316608. Throughput: 0: 902.2. Samples: 327026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:23:50,455][06480] Avg episode reward: [(0, '21.586')] +[2023-02-26 07:23:55,449][06480] Fps is (10 sec: 2457.4, 60 sec: 3618.0, 300 sec: 3582.2). Total num frames: 5328896. Throughput: 0: 889.8. Samples: 330784. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:23:55,452][06480] Avg episode reward: [(0, '21.124')] +[2023-02-26 07:24:00,446][06480] Fps is (10 sec: 2867.6, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 5345280. Throughput: 0: 872.2. Samples: 335172. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:24:00,449][06480] Avg episode reward: [(0, '21.742')] +[2023-02-26 07:24:03,308][30947] Updated weights for policy 0, policy_version 1308 (0.0028) +[2023-02-26 07:24:05,450][06480] Fps is (10 sec: 3276.5, 60 sec: 3481.5, 300 sec: 3582.2). Total num frames: 5361664. Throughput: 0: 864.0. Samples: 338130. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:24:05,457][06480] Avg episode reward: [(0, '22.705')] +[2023-02-26 07:24:10,448][06480] Fps is (10 sec: 3276.1, 60 sec: 3481.5, 300 sec: 3568.4). Total num frames: 5378048. Throughput: 0: 828.3. Samples: 342646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:24:10,456][06480] Avg episode reward: [(0, '22.505')] +[2023-02-26 07:24:15,446][06480] Fps is (10 sec: 3278.0, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 5394432. Throughput: 0: 843.1. Samples: 347652. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:24:15,453][06480] Avg episode reward: [(0, '23.217')] +[2023-02-26 07:24:15,933][30947] Updated weights for policy 0, policy_version 1318 (0.0022) +[2023-02-26 07:24:20,446][06480] Fps is (10 sec: 3687.1, 60 sec: 3413.3, 300 sec: 3554.5). Total num frames: 5414912. Throughput: 0: 866.6. Samples: 350984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:24:20,452][06480] Avg episode reward: [(0, '24.884')] +[2023-02-26 07:24:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 5435392. Throughput: 0: 855.4. Samples: 357462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:24:25,449][06480] Avg episode reward: [(0, '24.387')] +[2023-02-26 07:24:26,351][30947] Updated weights for policy 0, policy_version 1328 (0.0020) +[2023-02-26 07:24:30,451][06480] Fps is (10 sec: 3275.1, 60 sec: 3413.0, 300 sec: 3554.4). Total num frames: 5447680. Throughput: 0: 819.2. Samples: 361744. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:24:30,458][06480] Avg episode reward: [(0, '24.330')] +[2023-02-26 07:24:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 5468160. Throughput: 0: 819.1. Samples: 363882. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:24:35,453][06480] Avg episode reward: [(0, '24.145')] +[2023-02-26 07:24:37,952][30947] Updated weights for policy 0, policy_version 1338 (0.0014) +[2023-02-26 07:24:40,446][06480] Fps is (10 sec: 4098.1, 60 sec: 3413.3, 300 sec: 3554.5). Total num frames: 5488640. Throughput: 0: 880.6. Samples: 370408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:24:40,450][06480] Avg episode reward: [(0, '23.669')] +[2023-02-26 07:24:45,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3582.3). Total num frames: 5509120. Throughput: 0: 917.9. Samples: 376478. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:24:45,452][06480] Avg episode reward: [(0, '23.690')] +[2023-02-26 07:24:49,148][30947] Updated weights for policy 0, policy_version 1348 (0.0030) +[2023-02-26 07:24:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.4, 300 sec: 3554.5). Total num frames: 5521408. Throughput: 0: 899.8. Samples: 378616. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:24:50,451][06480] Avg episode reward: [(0, '23.215')] +[2023-02-26 07:24:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3554.5). Total num frames: 5541888. Throughput: 0: 905.9. Samples: 383410. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:24:55,448][06480] Avg episode reward: [(0, '24.228')] +[2023-02-26 07:24:59,483][30947] Updated weights for policy 0, policy_version 1358 (0.0013) +[2023-02-26 07:25:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5566464. Throughput: 0: 947.1. Samples: 390270. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:25:00,449][06480] Avg episode reward: [(0, '25.126')] +[2023-02-26 07:25:05,450][06480] Fps is (10 sec: 4094.5, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 5582848. Throughput: 0: 944.2. Samples: 393476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:25:05,455][06480] Avg episode reward: [(0, '25.420')] +[2023-02-26 07:25:10,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3618.3, 300 sec: 3568.4). Total num frames: 5595136. Throughput: 0: 895.0. Samples: 397738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:25:10,449][06480] Avg episode reward: [(0, '25.399')] +[2023-02-26 07:25:10,458][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001366_5595136.pth... +[2023-02-26 07:25:10,657][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001157_4739072.pth +[2023-02-26 07:25:12,264][30947] Updated weights for policy 0, policy_version 1368 (0.0026) +[2023-02-26 07:25:15,446][06480] Fps is (10 sec: 3278.0, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 5615616. Throughput: 0: 916.2. Samples: 402968. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:25:15,448][06480] Avg episode reward: [(0, '24.072')] +[2023-02-26 07:25:20,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3596.1). Total num frames: 5640192. Throughput: 0: 944.3. Samples: 406376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:25:20,449][06480] Avg episode reward: [(0, '22.563')] +[2023-02-26 07:25:21,384][30947] Updated weights for policy 0, policy_version 1378 (0.0012) +[2023-02-26 07:25:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 5656576. Throughput: 0: 938.4. Samples: 412638. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:25:25,449][06480] Avg episode reward: [(0, '21.814')] +[2023-02-26 07:25:30,448][06480] Fps is (10 sec: 3276.2, 60 sec: 3754.9, 300 sec: 3596.1). Total num frames: 5672960. Throughput: 0: 901.0. Samples: 417026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:25:30,451][06480] Avg episode reward: [(0, '21.538')] +[2023-02-26 07:25:33,854][30947] Updated weights for policy 0, policy_version 1388 (0.0022) +[2023-02-26 07:25:35,446][06480] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5689344. Throughput: 0: 906.9. Samples: 419428. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:25:35,449][06480] Avg episode reward: [(0, '21.328')] +[2023-02-26 07:25:40,446][06480] Fps is (10 sec: 4096.7, 60 sec: 3754.7, 300 sec: 3596.2). Total num frames: 5713920. Throughput: 0: 951.2. Samples: 426214. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:25:40,449][06480] Avg episode reward: [(0, '22.251')] +[2023-02-26 07:25:43,099][30947] Updated weights for policy 0, policy_version 1398 (0.0012) +[2023-02-26 07:25:45,446][06480] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 5730304. Throughput: 0: 927.4. Samples: 432004. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:25:45,450][06480] Avg episode reward: [(0, '20.666')] +[2023-02-26 07:25:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 5746688. Throughput: 0: 902.6. Samples: 434088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:25:50,449][06480] Avg episode reward: [(0, '20.609')] +[2023-02-26 07:25:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5763072. Throughput: 0: 923.7. Samples: 439304. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:25:55,456][06480] Avg episode reward: [(0, '21.633')] +[2023-02-26 07:25:55,483][30947] Updated weights for policy 0, policy_version 1408 (0.0025) +[2023-02-26 07:26:00,447][06480] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 5787648. Throughput: 0: 959.2. Samples: 446132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:26:00,453][06480] Avg episode reward: [(0, '21.098')] +[2023-02-26 07:26:05,451][06480] Fps is (10 sec: 4094.2, 60 sec: 3686.3, 300 sec: 3610.0). Total num frames: 5804032. Throughput: 0: 949.6. Samples: 449112. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:26:05,453][06480] Avg episode reward: [(0, '21.543')] +[2023-02-26 07:26:05,791][30947] Updated weights for policy 0, policy_version 1418 (0.0019) +[2023-02-26 07:26:10,447][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.6, 300 sec: 3623.9). Total num frames: 5820416. Throughput: 0: 905.9. Samples: 453406. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:26:10,454][06480] Avg episode reward: [(0, '20.768')] +[2023-02-26 07:26:15,446][06480] Fps is (10 sec: 3688.1, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5840896. Throughput: 0: 933.2. Samples: 459020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:26:15,452][06480] Avg episode reward: [(0, '23.128')] +[2023-02-26 07:26:17,051][30947] Updated weights for policy 0, policy_version 1428 (0.0011) +[2023-02-26 07:26:20,446][06480] Fps is (10 sec: 4096.2, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5861376. Throughput: 0: 955.0. Samples: 462402. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:26:20,452][06480] Avg episode reward: [(0, '24.516')] +[2023-02-26 07:26:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5877760. Throughput: 0: 937.4. Samples: 468398. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:26:25,452][06480] Avg episode reward: [(0, '24.399')] +[2023-02-26 07:26:28,354][30947] Updated weights for policy 0, policy_version 1438 (0.0012) +[2023-02-26 07:26:30,447][06480] Fps is (10 sec: 3276.7, 60 sec: 3686.5, 300 sec: 3665.6). Total num frames: 5894144. Throughput: 0: 902.5. Samples: 472616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:26:30,452][06480] Avg episode reward: [(0, '24.210')] +[2023-02-26 07:26:35,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 5906432. Throughput: 0: 893.8. Samples: 474310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:26:35,454][06480] Avg episode reward: [(0, '24.118')] +[2023-02-26 07:26:40,446][06480] Fps is (10 sec: 2457.7, 60 sec: 3413.3, 300 sec: 3596.2). Total num frames: 5918720. Throughput: 0: 870.7. Samples: 478486. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:26:40,454][06480] Avg episode reward: [(0, '24.380')] +[2023-02-26 07:26:42,354][30947] Updated weights for policy 0, policy_version 1448 (0.0030) +[2023-02-26 07:26:45,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 5939200. Throughput: 0: 839.0. Samples: 483888. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:26:45,451][06480] Avg episode reward: [(0, '24.512')] +[2023-02-26 07:26:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3596.2). Total num frames: 5951488. Throughput: 0: 820.6. Samples: 486034. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:26:50,455][06480] Avg episode reward: [(0, '24.582')] +[2023-02-26 07:26:55,038][30947] Updated weights for policy 0, policy_version 1458 (0.0022) +[2023-02-26 07:26:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3596.1). Total num frames: 5971968. Throughput: 0: 835.3. Samples: 490992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:26:55,453][06480] Avg episode reward: [(0, '24.055')] +[2023-02-26 07:27:00,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3596.1). Total num frames: 5992448. Throughput: 0: 861.2. Samples: 497772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:00,454][06480] Avg episode reward: [(0, '23.510')] +[2023-02-26 07:27:04,513][30947] Updated weights for policy 0, policy_version 1468 (0.0016) +[2023-02-26 07:27:05,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3481.9, 300 sec: 3623.9). Total num frames: 6012928. Throughput: 0: 861.0. Samples: 501146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:05,455][06480] Avg episode reward: [(0, '24.418')] +[2023-02-26 07:27:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.4, 300 sec: 3596.1). Total num frames: 6025216. Throughput: 0: 821.7. Samples: 505374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:27:10,454][06480] Avg episode reward: [(0, '24.669')] +[2023-02-26 07:27:10,470][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001471_6025216.pth... +[2023-02-26 07:27:10,659][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001262_5169152.pth +[2023-02-26 07:27:15,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3596.2). Total num frames: 6045696. Throughput: 0: 843.4. Samples: 510570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:15,454][06480] Avg episode reward: [(0, '24.484')] +[2023-02-26 07:27:16,812][30947] Updated weights for policy 0, policy_version 1478 (0.0019) +[2023-02-26 07:27:20,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 6070272. Throughput: 0: 881.6. Samples: 513984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:20,450][06480] Avg episode reward: [(0, '25.618')] +[2023-02-26 07:27:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 6086656. Throughput: 0: 924.3. Samples: 520080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:25,453][06480] Avg episode reward: [(0, '25.184')] +[2023-02-26 07:27:27,725][30947] Updated weights for policy 0, policy_version 1488 (0.0016) +[2023-02-26 07:27:30,447][06480] Fps is (10 sec: 2866.9, 60 sec: 3413.3, 300 sec: 3596.1). Total num frames: 6098944. Throughput: 0: 899.8. Samples: 524380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:27:30,455][06480] Avg episode reward: [(0, '24.885')] +[2023-02-26 07:27:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3596.1). Total num frames: 6119424. Throughput: 0: 910.0. Samples: 526982. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:27:35,454][06480] Avg episode reward: [(0, '24.489')] +[2023-02-26 07:27:38,427][30947] Updated weights for policy 0, policy_version 1498 (0.0011) +[2023-02-26 07:27:40,446][06480] Fps is (10 sec: 4506.0, 60 sec: 3754.7, 300 sec: 3610.1). Total num frames: 6144000. Throughput: 0: 951.0. Samples: 533788. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:27:40,449][06480] Avg episode reward: [(0, '24.006')] +[2023-02-26 07:27:45,448][06480] Fps is (10 sec: 4095.3, 60 sec: 3686.3, 300 sec: 3610.0). Total num frames: 6160384. Throughput: 0: 925.9. Samples: 539440. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:27:45,452][06480] Avg episode reward: [(0, '23.315')] +[2023-02-26 07:27:50,244][30947] Updated weights for policy 0, policy_version 1508 (0.0011) +[2023-02-26 07:27:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 6176768. Throughput: 0: 898.1. Samples: 541562. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:27:50,457][06480] Avg episode reward: [(0, '22.595')] +[2023-02-26 07:27:55,446][06480] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3596.1). Total num frames: 6197248. Throughput: 0: 923.9. Samples: 546948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:27:55,453][06480] Avg episode reward: [(0, '22.399')] +[2023-02-26 07:27:59,799][30947] Updated weights for policy 0, policy_version 1518 (0.0016) +[2023-02-26 07:28:00,446][06480] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3610.1). Total num frames: 6217728. Throughput: 0: 960.8. Samples: 553806. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:28:00,452][06480] Avg episode reward: [(0, '21.534')] +[2023-02-26 07:28:05,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 6234112. Throughput: 0: 947.7. Samples: 556630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:28:05,454][06480] Avg episode reward: [(0, '21.394')] +[2023-02-26 07:28:10,447][06480] Fps is (10 sec: 3276.6, 60 sec: 3754.6, 300 sec: 3610.0). Total num frames: 6250496. Throughput: 0: 907.1. Samples: 560902. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-26 07:28:10,450][06480] Avg episode reward: [(0, '20.759')] +[2023-02-26 07:28:12,416][30947] Updated weights for policy 0, policy_version 1528 (0.0027) +[2023-02-26 07:28:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3596.2). Total num frames: 6270976. Throughput: 0: 941.6. Samples: 566752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:28:15,452][06480] Avg episode reward: [(0, '21.422')] +[2023-02-26 07:28:20,446][06480] Fps is (10 sec: 4096.2, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 6291456. Throughput: 0: 958.8. Samples: 570130. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:28:20,452][06480] Avg episode reward: [(0, '22.432')] +[2023-02-26 07:28:21,528][30947] Updated weights for policy 0, policy_version 1538 (0.0016) +[2023-02-26 07:28:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 6307840. Throughput: 0: 934.9. Samples: 575858. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:28:25,451][06480] Avg episode reward: [(0, '21.955')] +[2023-02-26 07:28:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 6324224. Throughput: 0: 903.9. Samples: 580114. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:28:30,458][06480] Avg episode reward: [(0, '22.209')] +[2023-02-26 07:28:33,957][30947] Updated weights for policy 0, policy_version 1548 (0.0026) +[2023-02-26 07:28:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3596.1). Total num frames: 6344704. Throughput: 0: 924.3. Samples: 583156. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:28:35,449][06480] Avg episode reward: [(0, '20.356')] +[2023-02-26 07:28:40,446][06480] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3610.1). Total num frames: 6369280. Throughput: 0: 953.6. Samples: 589862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:28:40,448][06480] Avg episode reward: [(0, '20.112')] +[2023-02-26 07:28:44,523][30947] Updated weights for policy 0, policy_version 1558 (0.0013) +[2023-02-26 07:28:45,446][06480] Fps is (10 sec: 3686.3, 60 sec: 3686.5, 300 sec: 3610.1). Total num frames: 6381568. Throughput: 0: 912.2. Samples: 594854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:28:45,448][06480] Avg episode reward: [(0, '20.062')] +[2023-02-26 07:28:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3624.0). Total num frames: 6397952. Throughput: 0: 896.0. Samples: 596952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:28:50,453][06480] Avg episode reward: [(0, '20.393')] +[2023-02-26 07:28:55,446][06480] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 6418432. Throughput: 0: 929.6. Samples: 602734. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:28:55,449][06480] Avg episode reward: [(0, '22.322')] +[2023-02-26 07:28:55,706][30947] Updated weights for policy 0, policy_version 1568 (0.0026) +[2023-02-26 07:29:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 6443008. Throughput: 0: 950.9. Samples: 609542. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:00,452][06480] Avg episode reward: [(0, '22.021')] +[2023-02-26 07:29:05,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 6455296. Throughput: 0: 929.5. Samples: 611956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:05,449][06480] Avg episode reward: [(0, '23.273')] +[2023-02-26 07:29:07,436][30947] Updated weights for policy 0, policy_version 1578 (0.0014) +[2023-02-26 07:29:10,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3618.2, 300 sec: 3637.8). Total num frames: 6467584. Throughput: 0: 892.5. Samples: 616020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:29:10,449][06480] Avg episode reward: [(0, '22.623')] +[2023-02-26 07:29:10,467][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001579_6467584.pth... +[2023-02-26 07:29:10,733][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001366_5595136.pth +[2023-02-26 07:29:15,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3623.9). Total num frames: 6483968. Throughput: 0: 884.0. Samples: 619896. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:29:15,449][06480] Avg episode reward: [(0, '22.691')] +[2023-02-26 07:29:20,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 6500352. Throughput: 0: 863.6. Samples: 622016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:29:20,454][06480] Avg episode reward: [(0, '21.632')] +[2023-02-26 07:29:21,000][30947] Updated weights for policy 0, policy_version 1588 (0.0015) +[2023-02-26 07:29:25,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3624.0). Total num frames: 6516736. Throughput: 0: 841.2. Samples: 627718. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:25,452][06480] Avg episode reward: [(0, '21.905')] +[2023-02-26 07:29:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 6533120. Throughput: 0: 825.9. Samples: 632018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:30,448][06480] Avg episode reward: [(0, '22.487')] +[2023-02-26 07:29:33,534][30947] Updated weights for policy 0, policy_version 1598 (0.0014) +[2023-02-26 07:29:35,446][06480] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 6553600. Throughput: 0: 841.1. Samples: 634802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:35,448][06480] Avg episode reward: [(0, '22.063')] +[2023-02-26 07:29:40,446][06480] Fps is (10 sec: 4095.9, 60 sec: 3413.3, 300 sec: 3610.0). Total num frames: 6574080. Throughput: 0: 860.6. Samples: 641462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:29:40,448][06480] Avg episode reward: [(0, '24.196')] +[2023-02-26 07:29:43,418][30947] Updated weights for policy 0, policy_version 1608 (0.0012) +[2023-02-26 07:29:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 6590464. Throughput: 0: 828.0. Samples: 646804. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:29:45,453][06480] Avg episode reward: [(0, '24.558')] +[2023-02-26 07:29:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3596.1). Total num frames: 6602752. Throughput: 0: 819.9. Samples: 648850. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:29:50,455][06480] Avg episode reward: [(0, '24.989')] +[2023-02-26 07:29:55,288][30947] Updated weights for policy 0, policy_version 1618 (0.0018) +[2023-02-26 07:29:55,447][06480] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3596.1). Total num frames: 6627328. Throughput: 0: 853.1. Samples: 654412. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:29:55,454][06480] Avg episode reward: [(0, '24.401')] +[2023-02-26 07:30:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3413.3, 300 sec: 3610.1). Total num frames: 6647808. Throughput: 0: 920.6. Samples: 661322. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:30:00,449][06480] Avg episode reward: [(0, '25.400')] +[2023-02-26 07:30:05,446][06480] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 6664192. Throughput: 0: 933.2. Samples: 664010. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:30:05,464][06480] Avg episode reward: [(0, '25.448')] +[2023-02-26 07:30:06,043][30947] Updated weights for policy 0, policy_version 1628 (0.0011) +[2023-02-26 07:30:10,447][06480] Fps is (10 sec: 3276.6, 60 sec: 3549.8, 300 sec: 3610.0). Total num frames: 6680576. Throughput: 0: 901.1. Samples: 668266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:30:10,450][06480] Avg episode reward: [(0, '24.827')] +[2023-02-26 07:30:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 6701056. Throughput: 0: 939.1. Samples: 674276. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:30:15,452][06480] Avg episode reward: [(0, '24.071')] +[2023-02-26 07:30:17,002][30947] Updated weights for policy 0, policy_version 1638 (0.0015) +[2023-02-26 07:30:20,446][06480] Fps is (10 sec: 4505.9, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 6725632. Throughput: 0: 953.5. Samples: 677710. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:30:20,452][06480] Avg episode reward: [(0, '25.094')] +[2023-02-26 07:30:25,448][06480] Fps is (10 sec: 4095.3, 60 sec: 3754.6, 300 sec: 3623.9). Total num frames: 6742016. Throughput: 0: 933.3. Samples: 683462. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:30:25,454][06480] Avg episode reward: [(0, '23.794')] +[2023-02-26 07:30:28,194][30947] Updated weights for policy 0, policy_version 1648 (0.0013) +[2023-02-26 07:30:30,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 6754304. Throughput: 0: 911.7. Samples: 687832. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:30:30,449][06480] Avg episode reward: [(0, '23.786')] +[2023-02-26 07:30:35,446][06480] Fps is (10 sec: 3277.4, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 6774784. Throughput: 0: 931.5. Samples: 690766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:30:35,453][06480] Avg episode reward: [(0, '24.316')] +[2023-02-26 07:30:38,291][30947] Updated weights for policy 0, policy_version 1658 (0.0025) +[2023-02-26 07:30:40,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 6799360. Throughput: 0: 964.9. Samples: 697834. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:30:40,453][06480] Avg episode reward: [(0, '23.834')] +[2023-02-26 07:30:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 6811648. Throughput: 0: 922.9. Samples: 702854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:30:45,452][06480] Avg episode reward: [(0, '24.319')] +[2023-02-26 07:30:50,450][06480] Fps is (10 sec: 2866.1, 60 sec: 3754.4, 300 sec: 3610.0). Total num frames: 6828032. Throughput: 0: 912.8. Samples: 705090. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:30:50,459][06480] Avg episode reward: [(0, '23.748')] +[2023-02-26 07:30:50,883][30947] Updated weights for policy 0, policy_version 1668 (0.0030) +[2023-02-26 07:30:55,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 6852608. Throughput: 0: 950.2. Samples: 711026. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:30:55,451][06480] Avg episode reward: [(0, '23.870')] +[2023-02-26 07:30:59,473][30947] Updated weights for policy 0, policy_version 1678 (0.0011) +[2023-02-26 07:31:00,446][06480] Fps is (10 sec: 4917.0, 60 sec: 3822.9, 300 sec: 3637.9). Total num frames: 6877184. Throughput: 0: 970.4. Samples: 717942. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:31:00,453][06480] Avg episode reward: [(0, '23.148')] +[2023-02-26 07:31:05,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 6889472. Throughput: 0: 948.1. Samples: 720376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:31:05,455][06480] Avg episode reward: [(0, '23.203')] +[2023-02-26 07:31:10,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 6905856. Throughput: 0: 914.6. Samples: 724616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:31:10,448][06480] Avg episode reward: [(0, '24.213')] +[2023-02-26 07:31:10,461][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001686_6905856.pth... +[2023-02-26 07:31:10,619][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001471_6025216.pth +[2023-02-26 07:31:12,347][30947] Updated weights for policy 0, policy_version 1688 (0.0022) +[2023-02-26 07:31:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 6926336. Throughput: 0: 950.0. Samples: 730582. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:31:15,449][06480] Avg episode reward: [(0, '23.063')] +[2023-02-26 07:31:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 6946816. Throughput: 0: 960.9. Samples: 734006. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:31:20,455][06480] Avg episode reward: [(0, '23.145')] +[2023-02-26 07:31:22,061][30947] Updated weights for policy 0, policy_version 1698 (0.0015) +[2023-02-26 07:31:25,448][06480] Fps is (10 sec: 3685.8, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 6963200. Throughput: 0: 920.2. Samples: 739244. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:31:25,450][06480] Avg episode reward: [(0, '23.530')] +[2023-02-26 07:31:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 6979584. Throughput: 0: 907.8. Samples: 743706. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:31:30,454][06480] Avg episode reward: [(0, '24.687')] +[2023-02-26 07:31:33,867][30947] Updated weights for policy 0, policy_version 1708 (0.0017) +[2023-02-26 07:31:35,446][06480] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 7000064. Throughput: 0: 935.3. Samples: 747174. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:31:35,449][06480] Avg episode reward: [(0, '23.460')] +[2023-02-26 07:31:40,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7024640. Throughput: 0: 958.9. Samples: 754178. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:31:40,449][06480] Avg episode reward: [(0, '21.934')] +[2023-02-26 07:31:44,366][30947] Updated weights for policy 0, policy_version 1718 (0.0011) +[2023-02-26 07:31:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7036928. Throughput: 0: 907.4. Samples: 758776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:31:45,456][06480] Avg episode reward: [(0, '23.191')] +[2023-02-26 07:31:50,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3686.6, 300 sec: 3651.7). Total num frames: 7049216. Throughput: 0: 894.6. Samples: 760632. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:31:50,450][06480] Avg episode reward: [(0, '23.413')] +[2023-02-26 07:31:55,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 7065600. Throughput: 0: 886.4. Samples: 764506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:31:55,453][06480] Avg episode reward: [(0, '22.811')] +[2023-02-26 07:31:58,702][30947] Updated weights for policy 0, policy_version 1728 (0.0023) +[2023-02-26 07:32:00,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3623.9). Total num frames: 7081984. Throughput: 0: 877.3. Samples: 770062. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:00,448][06480] Avg episode reward: [(0, '22.600')] +[2023-02-26 07:32:05,450][06480] Fps is (10 sec: 3685.1, 60 sec: 3549.7, 300 sec: 3651.6). Total num frames: 7102464. Throughput: 0: 866.7. Samples: 773012. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:32:05,453][06480] Avg episode reward: [(0, '22.817')] +[2023-02-26 07:32:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 7114752. Throughput: 0: 848.1. Samples: 777406. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:10,452][06480] Avg episode reward: [(0, '23.679')] +[2023-02-26 07:32:11,041][30947] Updated weights for policy 0, policy_version 1738 (0.0027) +[2023-02-26 07:32:15,446][06480] Fps is (10 sec: 3278.0, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 7135232. Throughput: 0: 874.4. Samples: 783056. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:15,451][06480] Avg episode reward: [(0, '22.871')] +[2023-02-26 07:32:19,866][30947] Updated weights for policy 0, policy_version 1748 (0.0014) +[2023-02-26 07:32:20,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 7159808. Throughput: 0: 878.3. Samples: 786696. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:20,449][06480] Avg episode reward: [(0, '24.448')] +[2023-02-26 07:32:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3550.0, 300 sec: 3651.7). Total num frames: 7176192. Throughput: 0: 859.7. Samples: 792864. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:25,450][06480] Avg episode reward: [(0, '23.525')] +[2023-02-26 07:32:30,449][06480] Fps is (10 sec: 3276.0, 60 sec: 3549.7, 300 sec: 3637.8). Total num frames: 7192576. Throughput: 0: 853.3. Samples: 797176. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:30,454][06480] Avg episode reward: [(0, '23.898')] +[2023-02-26 07:32:32,376][30947] Updated weights for policy 0, policy_version 1758 (0.0020) +[2023-02-26 07:32:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3623.9). Total num frames: 7213056. Throughput: 0: 877.6. Samples: 800126. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:32:35,454][06480] Avg episode reward: [(0, '24.703')] +[2023-02-26 07:32:40,446][06480] Fps is (10 sec: 4506.6, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 7237632. Throughput: 0: 947.5. Samples: 807144. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:32:40,453][06480] Avg episode reward: [(0, '25.725')] +[2023-02-26 07:32:41,040][30947] Updated weights for policy 0, policy_version 1768 (0.0013) +[2023-02-26 07:32:45,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 7254016. Throughput: 0: 939.9. Samples: 812358. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:32:45,455][06480] Avg episode reward: [(0, '26.374')] +[2023-02-26 07:32:50,447][06480] Fps is (10 sec: 2867.1, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 7266304. Throughput: 0: 921.1. Samples: 814458. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:32:50,448][06480] Avg episode reward: [(0, '24.860')] +[2023-02-26 07:32:53,836][30947] Updated weights for policy 0, policy_version 1778 (0.0019) +[2023-02-26 07:32:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 7286784. Throughput: 0: 947.1. Samples: 820026. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:32:55,449][06480] Avg episode reward: [(0, '24.313')] +[2023-02-26 07:33:00,447][06480] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3651.7). Total num frames: 7311360. Throughput: 0: 977.9. Samples: 827060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:33:00,454][06480] Avg episode reward: [(0, '24.487')] +[2023-02-26 07:33:03,378][30947] Updated weights for policy 0, policy_version 1788 (0.0018) +[2023-02-26 07:33:05,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.9, 300 sec: 3651.7). Total num frames: 7327744. Throughput: 0: 956.2. Samples: 829726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:33:05,448][06480] Avg episode reward: [(0, '23.168')] +[2023-02-26 07:33:10,446][06480] Fps is (10 sec: 3276.9, 60 sec: 3822.9, 300 sec: 3637.8). Total num frames: 7344128. Throughput: 0: 915.6. Samples: 834064. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:33:10,453][06480] Avg episode reward: [(0, '23.577')] +[2023-02-26 07:33:10,468][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001793_7344128.pth... +[2023-02-26 07:33:10,675][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001579_6467584.pth +[2023-02-26 07:33:15,148][30947] Updated weights for policy 0, policy_version 1798 (0.0019) +[2023-02-26 07:33:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3637.8). Total num frames: 7364608. Throughput: 0: 954.1. Samples: 840110. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:33:15,449][06480] Avg episode reward: [(0, '24.646')] +[2023-02-26 07:33:20,447][06480] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3651.7). Total num frames: 7385088. Throughput: 0: 965.3. Samples: 843564. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:33:20,449][06480] Avg episode reward: [(0, '25.290')] +[2023-02-26 07:33:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 7401472. Throughput: 0: 933.4. Samples: 849146. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:33:25,452][06480] Avg episode reward: [(0, '26.333')] +[2023-02-26 07:33:25,935][30947] Updated weights for policy 0, policy_version 1808 (0.0011) +[2023-02-26 07:33:30,446][06480] Fps is (10 sec: 3276.9, 60 sec: 3754.8, 300 sec: 3637.8). Total num frames: 7417856. Throughput: 0: 914.9. Samples: 853530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:33:30,449][06480] Avg episode reward: [(0, '26.124')] +[2023-02-26 07:33:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 7438336. Throughput: 0: 940.5. Samples: 856782. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:33:35,448][06480] Avg episode reward: [(0, '26.769')] +[2023-02-26 07:33:36,509][30947] Updated weights for policy 0, policy_version 1818 (0.0016) +[2023-02-26 07:33:40,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 7462912. Throughput: 0: 971.6. Samples: 863750. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:33:40,452][06480] Avg episode reward: [(0, '26.870')] +[2023-02-26 07:33:40,469][30933] Saving new best policy, reward=26.870! +[2023-02-26 07:33:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 7475200. Throughput: 0: 921.5. Samples: 868528. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:33:45,451][06480] Avg episode reward: [(0, '26.141')] +[2023-02-26 07:33:48,416][30947] Updated weights for policy 0, policy_version 1828 (0.0034) +[2023-02-26 07:33:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 7491584. Throughput: 0: 909.4. Samples: 870648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:33:50,451][06480] Avg episode reward: [(0, '25.799')] +[2023-02-26 07:33:55,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3637.8). Total num frames: 7516160. Throughput: 0: 947.2. Samples: 876690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:33:55,451][06480] Avg episode reward: [(0, '24.469')] +[2023-02-26 07:33:57,916][30947] Updated weights for policy 0, policy_version 1838 (0.0015) +[2023-02-26 07:34:00,450][06480] Fps is (10 sec: 4504.0, 60 sec: 3754.5, 300 sec: 3665.5). Total num frames: 7536640. Throughput: 0: 968.3. Samples: 883688. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:34:00,452][06480] Avg episode reward: [(0, '24.388')] +[2023-02-26 07:34:05,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7553024. Throughput: 0: 941.3. Samples: 885920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:34:05,453][06480] Avg episode reward: [(0, '25.044')] +[2023-02-26 07:34:10,357][30947] Updated weights for policy 0, policy_version 1848 (0.0019) +[2023-02-26 07:34:10,446][06480] Fps is (10 sec: 3278.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7569408. Throughput: 0: 914.5. Samples: 890300. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:34:10,450][06480] Avg episode reward: [(0, '24.825')] +[2023-02-26 07:34:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 7589888. Throughput: 0: 958.1. Samples: 896644. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:34:15,449][06480] Avg episode reward: [(0, '24.808')] +[2023-02-26 07:34:19,395][30947] Updated weights for policy 0, policy_version 1858 (0.0018) +[2023-02-26 07:34:20,446][06480] Fps is (10 sec: 4505.7, 60 sec: 3823.0, 300 sec: 3721.1). Total num frames: 7614464. Throughput: 0: 962.6. Samples: 900098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:34:20,450][06480] Avg episode reward: [(0, '25.447')] +[2023-02-26 07:34:25,447][06480] Fps is (10 sec: 3686.2, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 7626752. Throughput: 0: 922.1. Samples: 905246. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:34:25,449][06480] Avg episode reward: [(0, '25.745')] +[2023-02-26 07:34:30,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7639040. Throughput: 0: 895.0. Samples: 908802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:34:30,453][06480] Avg episode reward: [(0, '26.017')] +[2023-02-26 07:34:34,676][30947] Updated weights for policy 0, policy_version 1868 (0.0033) +[2023-02-26 07:34:35,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3549.8, 300 sec: 3651.7). Total num frames: 7651328. Throughput: 0: 889.7. Samples: 910684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:34:35,455][06480] Avg episode reward: [(0, '26.570')] +[2023-02-26 07:34:40,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 7671808. Throughput: 0: 876.1. Samples: 916116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:34:40,449][06480] Avg episode reward: [(0, '24.630')] +[2023-02-26 07:34:45,449][06480] Fps is (10 sec: 3685.5, 60 sec: 3549.7, 300 sec: 3679.4). Total num frames: 7688192. Throughput: 0: 842.8. Samples: 921614. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:34:45,452][06480] Avg episode reward: [(0, '25.597')] +[2023-02-26 07:34:45,757][30947] Updated weights for policy 0, policy_version 1878 (0.0017) +[2023-02-26 07:34:50,448][06480] Fps is (10 sec: 3276.3, 60 sec: 3549.8, 300 sec: 3651.7). Total num frames: 7704576. Throughput: 0: 837.3. Samples: 923600. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:34:50,451][06480] Avg episode reward: [(0, '26.227')] +[2023-02-26 07:34:55,446][06480] Fps is (10 sec: 3687.4, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 7725056. Throughput: 0: 859.7. Samples: 928986. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:34:55,449][06480] Avg episode reward: [(0, '26.141')] +[2023-02-26 07:34:56,928][30947] Updated weights for policy 0, policy_version 1888 (0.0021) +[2023-02-26 07:35:00,446][06480] Fps is (10 sec: 4096.6, 60 sec: 3481.8, 300 sec: 3665.6). Total num frames: 7745536. Throughput: 0: 873.8. Samples: 935964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:00,448][06480] Avg episode reward: [(0, '26.378')] +[2023-02-26 07:35:05,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 7766016. Throughput: 0: 863.2. Samples: 938944. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:35:05,450][06480] Avg episode reward: [(0, '25.759')] +[2023-02-26 07:35:07,922][30947] Updated weights for policy 0, policy_version 1898 (0.0016) +[2023-02-26 07:35:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 7778304. Throughput: 0: 846.8. Samples: 943350. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:35:10,457][06480] Avg episode reward: [(0, '26.604')] +[2023-02-26 07:35:10,469][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001899_7778304.pth... +[2023-02-26 07:35:10,687][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001686_6905856.pth +[2023-02-26 07:35:15,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 7798784. Throughput: 0: 891.8. Samples: 948934. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:15,448][06480] Avg episode reward: [(0, '26.011')] +[2023-02-26 07:35:18,541][30947] Updated weights for policy 0, policy_version 1908 (0.0015) +[2023-02-26 07:35:20,446][06480] Fps is (10 sec: 4505.5, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 7823360. Throughput: 0: 925.0. Samples: 952310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:35:20,454][06480] Avg episode reward: [(0, '24.774')] +[2023-02-26 07:35:25,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 7839744. Throughput: 0: 935.0. Samples: 958192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:35:25,449][06480] Avg episode reward: [(0, '24.464')] +[2023-02-26 07:35:30,448][06480] Fps is (10 sec: 2866.8, 60 sec: 3549.8, 300 sec: 3651.7). Total num frames: 7852032. Throughput: 0: 909.6. Samples: 962544. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:30,452][06480] Avg episode reward: [(0, '24.578')] +[2023-02-26 07:35:30,908][30947] Updated weights for policy 0, policy_version 1918 (0.0019) +[2023-02-26 07:35:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 7876608. Throughput: 0: 931.3. Samples: 965508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:35,451][06480] Avg episode reward: [(0, '24.388')] +[2023-02-26 07:35:39,806][30947] Updated weights for policy 0, policy_version 1928 (0.0013) +[2023-02-26 07:35:40,446][06480] Fps is (10 sec: 4506.3, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7897088. Throughput: 0: 964.9. Samples: 972408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:40,451][06480] Avg episode reward: [(0, '24.790')] +[2023-02-26 07:35:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3679.5). Total num frames: 7913472. Throughput: 0: 926.9. Samples: 977674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:35:45,451][06480] Avg episode reward: [(0, '25.930')] +[2023-02-26 07:35:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.5, 300 sec: 3637.8). Total num frames: 7925760. Throughput: 0: 905.6. Samples: 979694. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:35:50,451][06480] Avg episode reward: [(0, '26.518')] +[2023-02-26 07:35:52,592][30947] Updated weights for policy 0, policy_version 1938 (0.0012) +[2023-02-26 07:35:55,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 7950336. Throughput: 0: 932.9. Samples: 985332. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:35:55,449][06480] Avg episode reward: [(0, '27.088')] +[2023-02-26 07:35:55,454][30933] Saving new best policy, reward=27.088! +[2023-02-26 07:36:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 7970816. Throughput: 0: 961.7. Samples: 992210. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:36:00,450][06480] Avg episode reward: [(0, '26.090')] +[2023-02-26 07:36:01,891][30947] Updated weights for policy 0, policy_version 1948 (0.0016) +[2023-02-26 07:36:05,452][06480] Fps is (10 sec: 3684.3, 60 sec: 3686.1, 300 sec: 3665.5). Total num frames: 7987200. Throughput: 0: 941.9. Samples: 994700. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:36:05,454][06480] Avg episode reward: [(0, '26.063')] +[2023-02-26 07:36:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 8003584. Throughput: 0: 905.8. Samples: 998952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:36:10,450][06480] Avg episode reward: [(0, '26.014')] +[2023-02-26 07:36:14,156][30947] Updated weights for policy 0, policy_version 1958 (0.0018) +[2023-02-26 07:36:15,446][06480] Fps is (10 sec: 3688.5, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 8024064. Throughput: 0: 943.3. Samples: 1004992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:36:15,454][06480] Avg episode reward: [(0, '23.998')] +[2023-02-26 07:36:20,446][06480] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 8044544. Throughput: 0: 948.0. Samples: 1008168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:36:20,454][06480] Avg episode reward: [(0, '22.165')] +[2023-02-26 07:36:24,728][30947] Updated weights for policy 0, policy_version 1968 (0.0021) +[2023-02-26 07:36:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 8060928. Throughput: 0: 917.8. Samples: 1013708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:36:25,450][06480] Avg episode reward: [(0, '22.411')] +[2023-02-26 07:36:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3651.7). Total num frames: 8077312. Throughput: 0: 897.2. Samples: 1018048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:36:30,451][06480] Avg episode reward: [(0, '23.138')] +[2023-02-26 07:36:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 8097792. Throughput: 0: 922.0. Samples: 1021182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:36:35,452][06480] Avg episode reward: [(0, '23.771')] +[2023-02-26 07:36:35,927][30947] Updated weights for policy 0, policy_version 1978 (0.0022) +[2023-02-26 07:36:40,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 8122368. Throughput: 0: 951.0. Samples: 1028126. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:36:40,448][06480] Avg episode reward: [(0, '22.902')] +[2023-02-26 07:36:45,447][06480] Fps is (10 sec: 3686.1, 60 sec: 3686.3, 300 sec: 3679.4). Total num frames: 8134656. Throughput: 0: 907.1. Samples: 1033030. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-26 07:36:45,450][06480] Avg episode reward: [(0, '23.697')] +[2023-02-26 07:36:47,301][30947] Updated weights for policy 0, policy_version 1988 (0.0011) +[2023-02-26 07:36:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 8151040. Throughput: 0: 899.5. Samples: 1035174. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:36:50,449][06480] Avg episode reward: [(0, '24.005')] +[2023-02-26 07:36:55,446][06480] Fps is (10 sec: 3686.7, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 8171520. Throughput: 0: 937.2. Samples: 1041128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:36:55,453][06480] Avg episode reward: [(0, '24.759')] +[2023-02-26 07:36:57,318][30947] Updated weights for policy 0, policy_version 1998 (0.0012) +[2023-02-26 07:37:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 8196096. Throughput: 0: 957.1. Samples: 1048062. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:37:00,453][06480] Avg episode reward: [(0, '22.753')] +[2023-02-26 07:37:05,449][06480] Fps is (10 sec: 3685.4, 60 sec: 3686.6, 300 sec: 3707.2). Total num frames: 8208384. Throughput: 0: 936.8. Samples: 1050328. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:37:05,451][06480] Avg episode reward: [(0, '21.891')] +[2023-02-26 07:37:10,447][06480] Fps is (10 sec: 2457.4, 60 sec: 3618.1, 300 sec: 3679.4). Total num frames: 8220672. Throughput: 0: 894.1. Samples: 1053942. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:37:10,452][06480] Avg episode reward: [(0, '21.987')] +[2023-02-26 07:37:10,467][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002007_8220672.pth... +[2023-02-26 07:37:10,681][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001793_7344128.pth +[2023-02-26 07:37:11,280][30947] Updated weights for policy 0, policy_version 2008 (0.0053) +[2023-02-26 07:37:15,446][06480] Fps is (10 sec: 2458.2, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 8232960. Throughput: 0: 880.4. Samples: 1057668. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:37:15,451][06480] Avg episode reward: [(0, '22.591')] +[2023-02-26 07:37:20,446][06480] Fps is (10 sec: 3277.1, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 8253440. Throughput: 0: 869.0. Samples: 1060286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:37:20,451][06480] Avg episode reward: [(0, '23.436')] +[2023-02-26 07:37:22,746][30947] Updated weights for policy 0, policy_version 2018 (0.0011) +[2023-02-26 07:37:25,454][06480] Fps is (10 sec: 3683.6, 60 sec: 3481.2, 300 sec: 3651.6). Total num frames: 8269824. Throughput: 0: 852.7. Samples: 1066506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:37:25,456][06480] Avg episode reward: [(0, '23.560')] +[2023-02-26 07:37:30,446][06480] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 8286208. Throughput: 0: 839.8. Samples: 1070822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:37:30,449][06480] Avg episode reward: [(0, '22.737')] +[2023-02-26 07:37:35,135][30947] Updated weights for policy 0, policy_version 2028 (0.0015) +[2023-02-26 07:37:35,446][06480] Fps is (10 sec: 3689.2, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 8306688. Throughput: 0: 847.2. Samples: 1073296. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:37:35,449][06480] Avg episode reward: [(0, '23.546')] +[2023-02-26 07:37:40,446][06480] Fps is (10 sec: 4505.7, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 8331264. Throughput: 0: 868.7. Samples: 1080218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:37:40,449][06480] Avg episode reward: [(0, '24.514')] +[2023-02-26 07:37:45,120][30947] Updated weights for policy 0, policy_version 2038 (0.0010) +[2023-02-26 07:37:45,448][06480] Fps is (10 sec: 4095.4, 60 sec: 3549.8, 300 sec: 3665.6). Total num frames: 8347648. Throughput: 0: 842.0. Samples: 1085954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:37:45,454][06480] Avg episode reward: [(0, '25.337')] +[2023-02-26 07:37:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 8359936. Throughput: 0: 837.6. Samples: 1088020. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:37:50,452][06480] Avg episode reward: [(0, '26.003')] +[2023-02-26 07:37:55,446][06480] Fps is (10 sec: 3277.3, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 8380416. Throughput: 0: 875.6. Samples: 1093344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:37:55,452][06480] Avg episode reward: [(0, '26.212')] +[2023-02-26 07:37:56,429][30947] Updated weights for policy 0, policy_version 2048 (0.0014) +[2023-02-26 07:38:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 8404992. Throughput: 0: 947.0. Samples: 1100282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:38:00,454][06480] Avg episode reward: [(0, '26.809')] +[2023-02-26 07:38:05,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3550.0, 300 sec: 3651.7). Total num frames: 8421376. Throughput: 0: 953.0. Samples: 1103172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:38:05,449][06480] Avg episode reward: [(0, '25.551')] +[2023-02-26 07:38:07,624][30947] Updated weights for policy 0, policy_version 2058 (0.0032) +[2023-02-26 07:38:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3637.8). Total num frames: 8437760. Throughput: 0: 909.4. Samples: 1107424. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:38:10,453][06480] Avg episode reward: [(0, '26.393')] +[2023-02-26 07:38:15,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 8458240. Throughput: 0: 942.7. Samples: 1113242. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:38:15,448][06480] Avg episode reward: [(0, '26.528')] +[2023-02-26 07:38:18,256][30947] Updated weights for policy 0, policy_version 2068 (0.0021) +[2023-02-26 07:38:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 8478720. Throughput: 0: 962.2. Samples: 1116596. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:38:20,449][06480] Avg episode reward: [(0, '24.914')] +[2023-02-26 07:38:25,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3755.1, 300 sec: 3651.7). Total num frames: 8495104. Throughput: 0: 932.7. Samples: 1122188. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-26 07:38:25,453][06480] Avg episode reward: [(0, '22.969')] +[2023-02-26 07:38:30,362][30947] Updated weights for policy 0, policy_version 2078 (0.0019) +[2023-02-26 07:38:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 8511488. Throughput: 0: 903.0. Samples: 1126586. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:38:30,454][06480] Avg episode reward: [(0, '22.785')] +[2023-02-26 07:38:35,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 8531968. Throughput: 0: 924.6. Samples: 1129626. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:38:35,455][06480] Avg episode reward: [(0, '24.164')] +[2023-02-26 07:38:39,637][30947] Updated weights for policy 0, policy_version 2088 (0.0012) +[2023-02-26 07:38:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 8552448. Throughput: 0: 960.0. Samples: 1136542. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:38:40,448][06480] Avg episode reward: [(0, '22.012')] +[2023-02-26 07:38:45,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3651.7). Total num frames: 8568832. Throughput: 0: 920.3. Samples: 1141696. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:38:45,453][06480] Avg episode reward: [(0, '22.489')] +[2023-02-26 07:38:50,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 8581120. Throughput: 0: 903.4. Samples: 1143824. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:38:50,452][06480] Avg episode reward: [(0, '21.695')] +[2023-02-26 07:38:52,228][30947] Updated weights for policy 0, policy_version 2098 (0.0017) +[2023-02-26 07:38:55,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3624.0). Total num frames: 8605696. Throughput: 0: 934.4. Samples: 1149470. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:38:55,448][06480] Avg episode reward: [(0, '22.063')] +[2023-02-26 07:39:00,446][06480] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 8630272. Throughput: 0: 959.5. Samples: 1156420. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:39:00,454][06480] Avg episode reward: [(0, '21.145')] +[2023-02-26 07:39:01,180][30947] Updated weights for policy 0, policy_version 2108 (0.0020) +[2023-02-26 07:39:05,447][06480] Fps is (10 sec: 4095.8, 60 sec: 3754.6, 300 sec: 3651.7). Total num frames: 8646656. Throughput: 0: 941.7. Samples: 1158972. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:39:05,455][06480] Avg episode reward: [(0, '22.230')] +[2023-02-26 07:39:10,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 8658944. Throughput: 0: 914.7. Samples: 1163348. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:39:10,450][06480] Avg episode reward: [(0, '23.640')] +[2023-02-26 07:39:10,465][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002114_8658944.pth... +[2023-02-26 07:39:10,656][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001899_7778304.pth +[2023-02-26 07:39:13,738][30947] Updated weights for policy 0, policy_version 2118 (0.0019) +[2023-02-26 07:39:15,446][06480] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 8679424. Throughput: 0: 950.8. Samples: 1169374. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:39:15,451][06480] Avg episode reward: [(0, '24.455')] +[2023-02-26 07:39:20,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 8704000. Throughput: 0: 961.3. Samples: 1172886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:39:20,451][06480] Avg episode reward: [(0, '25.518')] +[2023-02-26 07:39:23,917][30947] Updated weights for policy 0, policy_version 2128 (0.0012) +[2023-02-26 07:39:25,447][06480] Fps is (10 sec: 4095.5, 60 sec: 3754.6, 300 sec: 3665.6). Total num frames: 8720384. Throughput: 0: 925.4. Samples: 1178188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:39:25,455][06480] Avg episode reward: [(0, '25.458')] +[2023-02-26 07:39:30,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 8732672. Throughput: 0: 906.8. Samples: 1182500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:39:30,454][06480] Avg episode reward: [(0, '27.245')] +[2023-02-26 07:39:30,466][30933] Saving new best policy, reward=27.245! +[2023-02-26 07:39:35,399][30947] Updated weights for policy 0, policy_version 2138 (0.0017) +[2023-02-26 07:39:35,446][06480] Fps is (10 sec: 3686.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 8757248. Throughput: 0: 932.0. Samples: 1185766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:39:35,449][06480] Avg episode reward: [(0, '27.434')] +[2023-02-26 07:39:35,452][30933] Saving new best policy, reward=27.434! +[2023-02-26 07:39:40,447][06480] Fps is (10 sec: 4505.1, 60 sec: 3754.6, 300 sec: 3693.4). Total num frames: 8777728. Throughput: 0: 958.8. Samples: 1192618. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:39:40,450][06480] Avg episode reward: [(0, '26.986')] +[2023-02-26 07:39:45,446][06480] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 8790016. Throughput: 0: 901.3. Samples: 1196978. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:39:45,449][06480] Avg episode reward: [(0, '27.441')] +[2023-02-26 07:39:45,462][30933] Saving new best policy, reward=27.441! +[2023-02-26 07:39:47,908][30947] Updated weights for policy 0, policy_version 2148 (0.0014) +[2023-02-26 07:39:50,447][06480] Fps is (10 sec: 2457.7, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 8802304. Throughput: 0: 881.8. Samples: 1198652. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:39:50,450][06480] Avg episode reward: [(0, '26.706')] +[2023-02-26 07:39:55,447][06480] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 8814592. Throughput: 0: 861.2. Samples: 1202102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:39:55,453][06480] Avg episode reward: [(0, '25.515')] +[2023-02-26 07:40:00,446][06480] Fps is (10 sec: 3277.0, 60 sec: 3413.3, 300 sec: 3623.9). Total num frames: 8835072. Throughput: 0: 864.6. Samples: 1208282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:40:00,454][06480] Avg episode reward: [(0, '24.726')] +[2023-02-26 07:40:00,505][30947] Updated weights for policy 0, policy_version 2158 (0.0020) +[2023-02-26 07:40:05,446][06480] Fps is (10 sec: 4505.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 8859648. Throughput: 0: 863.3. Samples: 1211736. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:40:05,451][06480] Avg episode reward: [(0, '25.968')] +[2023-02-26 07:40:10,447][06480] Fps is (10 sec: 3686.0, 60 sec: 3549.8, 300 sec: 3637.8). Total num frames: 8871936. Throughput: 0: 859.5. Samples: 1216864. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:40:10,454][06480] Avg episode reward: [(0, '25.906')] +[2023-02-26 07:40:11,908][30947] Updated weights for policy 0, policy_version 2168 (0.0013) +[2023-02-26 07:40:15,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 8888320. Throughput: 0: 865.4. Samples: 1221442. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:40:15,449][06480] Avg episode reward: [(0, '26.557')] +[2023-02-26 07:40:20,446][06480] Fps is (10 sec: 4096.5, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 8912896. Throughput: 0: 868.7. Samples: 1224858. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:40:20,451][06480] Avg episode reward: [(0, '27.134')] +[2023-02-26 07:40:21,819][30947] Updated weights for policy 0, policy_version 2178 (0.0014) +[2023-02-26 07:40:25,454][06480] Fps is (10 sec: 4502.2, 60 sec: 3549.5, 300 sec: 3665.5). Total num frames: 8933376. Throughput: 0: 868.0. Samples: 1231684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:40:25,456][06480] Avg episode reward: [(0, '28.060')] +[2023-02-26 07:40:25,465][30933] Saving new best policy, reward=28.060! +[2023-02-26 07:40:30,452][06480] Fps is (10 sec: 3684.2, 60 sec: 3617.8, 300 sec: 3637.7). Total num frames: 8949760. Throughput: 0: 870.8. Samples: 1236170. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:40:30,464][06480] Avg episode reward: [(0, '27.655')] +[2023-02-26 07:40:34,569][30947] Updated weights for policy 0, policy_version 2188 (0.0015) +[2023-02-26 07:40:35,446][06480] Fps is (10 sec: 2869.4, 60 sec: 3413.3, 300 sec: 3610.0). Total num frames: 8962048. Throughput: 0: 883.1. Samples: 1238390. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:40:35,454][06480] Avg episode reward: [(0, '26.844')] +[2023-02-26 07:40:40,446][06480] Fps is (10 sec: 3688.6, 60 sec: 3481.7, 300 sec: 3637.8). Total num frames: 8986624. Throughput: 0: 946.9. Samples: 1244712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:40:40,448][06480] Avg episode reward: [(0, '27.013')] +[2023-02-26 07:40:43,420][30947] Updated weights for policy 0, policy_version 2198 (0.0019) +[2023-02-26 07:40:45,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9007104. Throughput: 0: 955.3. Samples: 1251272. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:40:45,452][06480] Avg episode reward: [(0, '26.685')] +[2023-02-26 07:40:50,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 9023488. Throughput: 0: 926.4. Samples: 1253422. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:40:50,449][06480] Avg episode reward: [(0, '26.471')] +[2023-02-26 07:40:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 9039872. Throughput: 0: 911.8. Samples: 1257896. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:40:55,452][06480] Avg episode reward: [(0, '26.166')] +[2023-02-26 07:40:55,917][30947] Updated weights for policy 0, policy_version 2208 (0.0019) +[2023-02-26 07:41:00,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3651.8). Total num frames: 9064448. Throughput: 0: 964.4. Samples: 1264840. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:41:00,451][06480] Avg episode reward: [(0, '27.488')] +[2023-02-26 07:41:05,247][30947] Updated weights for policy 0, policy_version 2218 (0.0023) +[2023-02-26 07:41:05,452][06480] Fps is (10 sec: 4503.1, 60 sec: 3754.3, 300 sec: 3665.5). Total num frames: 9084928. Throughput: 0: 964.9. Samples: 1268286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:41:05,454][06480] Avg episode reward: [(0, '27.305')] +[2023-02-26 07:41:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 9097216. Throughput: 0: 917.4. Samples: 1272962. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-26 07:41:10,454][06480] Avg episode reward: [(0, '27.296')] +[2023-02-26 07:41:10,465][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002221_9097216.pth... +[2023-02-26 07:41:10,697][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002007_8220672.pth +[2023-02-26 07:41:15,446][06480] Fps is (10 sec: 2868.8, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 9113600. Throughput: 0: 923.9. Samples: 1277740. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:41:15,449][06480] Avg episode reward: [(0, '26.732')] +[2023-02-26 07:41:17,417][30947] Updated weights for policy 0, policy_version 2228 (0.0014) +[2023-02-26 07:41:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 9138176. Throughput: 0: 951.1. Samples: 1281188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:41:20,449][06480] Avg episode reward: [(0, '25.947')] +[2023-02-26 07:41:25,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3755.1, 300 sec: 3665.6). Total num frames: 9158656. Throughput: 0: 957.6. Samples: 1287802. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-26 07:41:25,454][06480] Avg episode reward: [(0, '26.765')] +[2023-02-26 07:41:28,087][30947] Updated weights for policy 0, policy_version 2238 (0.0011) +[2023-02-26 07:41:30,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.8, 300 sec: 3637.8). Total num frames: 9170944. Throughput: 0: 908.9. Samples: 1292174. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:41:30,453][06480] Avg episode reward: [(0, '25.706')] +[2023-02-26 07:41:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3623.9). Total num frames: 9191424. Throughput: 0: 910.0. Samples: 1294374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:41:35,454][06480] Avg episode reward: [(0, '25.259')] +[2023-02-26 07:41:38,873][30947] Updated weights for policy 0, policy_version 2248 (0.0021) +[2023-02-26 07:41:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 9211904. Throughput: 0: 960.3. Samples: 1301110. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-26 07:41:40,454][06480] Avg episode reward: [(0, '24.425')] +[2023-02-26 07:41:45,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 9232384. Throughput: 0: 942.9. Samples: 1307270. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:41:45,449][06480] Avg episode reward: [(0, '25.970')] +[2023-02-26 07:41:50,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 9244672. Throughput: 0: 913.3. Samples: 1309378. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:41:50,454][06480] Avg episode reward: [(0, '27.058')] +[2023-02-26 07:41:50,673][30947] Updated weights for policy 0, policy_version 2258 (0.0014) +[2023-02-26 07:41:55,446][06480] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 9265152. Throughput: 0: 917.7. Samples: 1314260. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:41:55,454][06480] Avg episode reward: [(0, '25.692')] +[2023-02-26 07:42:00,411][30947] Updated weights for policy 0, policy_version 2268 (0.0012) +[2023-02-26 07:42:00,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 9289728. Throughput: 0: 966.7. Samples: 1321242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:00,448][06480] Avg episode reward: [(0, '26.390')] +[2023-02-26 07:42:05,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3686.7, 300 sec: 3679.5). Total num frames: 9306112. Throughput: 0: 966.9. Samples: 1324700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:42:05,453][06480] Avg episode reward: [(0, '26.578')] +[2023-02-26 07:42:10,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 9322496. Throughput: 0: 916.7. Samples: 1329054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:10,453][06480] Avg episode reward: [(0, '26.682')] +[2023-02-26 07:42:12,807][30947] Updated weights for policy 0, policy_version 2278 (0.0014) +[2023-02-26 07:42:15,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 9338880. Throughput: 0: 937.5. Samples: 1334360. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:15,452][06480] Avg episode reward: [(0, '25.903')] +[2023-02-26 07:42:20,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 9363456. Throughput: 0: 965.4. Samples: 1337816. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:20,448][06480] Avg episode reward: [(0, '26.157')] +[2023-02-26 07:42:22,544][30947] Updated weights for policy 0, policy_version 2288 (0.0013) +[2023-02-26 07:42:25,446][06480] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 9375744. Throughput: 0: 927.7. Samples: 1342856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:42:25,454][06480] Avg episode reward: [(0, '25.773')] +[2023-02-26 07:42:30,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9388032. Throughput: 0: 864.3. Samples: 1346162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:30,449][06480] Avg episode reward: [(0, '27.171')] +[2023-02-26 07:42:35,446][06480] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 9400320. Throughput: 0: 855.3. Samples: 1347866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:42:35,453][06480] Avg episode reward: [(0, '26.982')] +[2023-02-26 07:42:37,994][30947] Updated weights for policy 0, policy_version 2298 (0.0024) +[2023-02-26 07:42:40,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 9420800. Throughput: 0: 872.0. Samples: 1353500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:42:40,452][06480] Avg episode reward: [(0, '27.083')] +[2023-02-26 07:42:45,446][06480] Fps is (10 sec: 4505.5, 60 sec: 3549.8, 300 sec: 3679.5). Total num frames: 9445376. Throughput: 0: 872.6. Samples: 1360510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:42:45,450][06480] Avg episode reward: [(0, '26.640')] +[2023-02-26 07:42:47,241][30947] Updated weights for policy 0, policy_version 2308 (0.0017) +[2023-02-26 07:42:50,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9461760. Throughput: 0: 852.1. Samples: 1363044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:42:50,449][06480] Avg episode reward: [(0, '26.232')] +[2023-02-26 07:42:55,449][06480] Fps is (10 sec: 2866.5, 60 sec: 3481.5, 300 sec: 3623.9). Total num frames: 9474048. Throughput: 0: 852.0. Samples: 1367398. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:42:55,453][06480] Avg episode reward: [(0, '25.788')] +[2023-02-26 07:42:59,181][30947] Updated weights for policy 0, policy_version 2318 (0.0021) +[2023-02-26 07:43:00,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 9498624. Throughput: 0: 871.6. Samples: 1373580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:43:00,449][06480] Avg episode reward: [(0, '26.575')] +[2023-02-26 07:43:05,446][06480] Fps is (10 sec: 4506.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 9519104. Throughput: 0: 871.3. Samples: 1377026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:43:05,452][06480] Avg episode reward: [(0, '25.961')] +[2023-02-26 07:43:09,659][30947] Updated weights for policy 0, policy_version 2328 (0.0027) +[2023-02-26 07:43:10,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 9535488. Throughput: 0: 882.5. Samples: 1382570. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:43:10,449][06480] Avg episode reward: [(0, '27.589')] +[2023-02-26 07:43:10,463][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002328_9535488.pth... +[2023-02-26 07:43:10,668][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002114_8658944.pth +[2023-02-26 07:43:15,447][06480] Fps is (10 sec: 2867.0, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 9547776. Throughput: 0: 904.4. Samples: 1386860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:43:15,451][06480] Avg episode reward: [(0, '27.340')] +[2023-02-26 07:43:20,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 9572352. Throughput: 0: 938.0. Samples: 1390074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:43:20,449][06480] Avg episode reward: [(0, '26.245')] +[2023-02-26 07:43:20,741][30947] Updated weights for policy 0, policy_version 2338 (0.0015) +[2023-02-26 07:43:25,446][06480] Fps is (10 sec: 4915.5, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 9596928. Throughput: 0: 970.2. Samples: 1397158. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:43:25,453][06480] Avg episode reward: [(0, '25.649')] +[2023-02-26 07:43:30,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 9609216. Throughput: 0: 922.4. Samples: 1402016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:43:30,449][06480] Avg episode reward: [(0, '26.424')] +[2023-02-26 07:43:32,122][30947] Updated weights for policy 0, policy_version 2348 (0.0031) +[2023-02-26 07:43:35,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 9625600. Throughput: 0: 914.3. Samples: 1404188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:43:35,450][06480] Avg episode reward: [(0, '26.528')] +[2023-02-26 07:43:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 9650176. Throughput: 0: 950.6. Samples: 1410172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:43:40,449][06480] Avg episode reward: [(0, '24.940')] +[2023-02-26 07:43:42,019][30947] Updated weights for policy 0, policy_version 2358 (0.0018) +[2023-02-26 07:43:45,447][06480] Fps is (10 sec: 4505.0, 60 sec: 3754.6, 300 sec: 3693.3). Total num frames: 9670656. Throughput: 0: 968.3. Samples: 1417154. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:43:45,451][06480] Avg episode reward: [(0, '25.527')] +[2023-02-26 07:43:50,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 9687040. Throughput: 0: 944.6. Samples: 1419534. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:43:50,456][06480] Avg episode reward: [(0, '25.565')] +[2023-02-26 07:43:54,064][30947] Updated weights for policy 0, policy_version 2368 (0.0018) +[2023-02-26 07:43:55,446][06480] Fps is (10 sec: 3277.2, 60 sec: 3823.1, 300 sec: 3637.8). Total num frames: 9703424. Throughput: 0: 918.7. Samples: 1423912. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:43:55,455][06480] Avg episode reward: [(0, '25.331')] +[2023-02-26 07:44:00,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 9723904. Throughput: 0: 963.7. Samples: 1430226. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-26 07:44:00,453][06480] Avg episode reward: [(0, '22.780')] +[2023-02-26 07:44:03,356][30947] Updated weights for policy 0, policy_version 2378 (0.0020) +[2023-02-26 07:44:05,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3693.3). Total num frames: 9748480. Throughput: 0: 970.0. Samples: 1433724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:44:05,451][06480] Avg episode reward: [(0, '22.351')] +[2023-02-26 07:44:10,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 9760768. Throughput: 0: 928.7. Samples: 1438950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:44:10,453][06480] Avg episode reward: [(0, '23.164')] +[2023-02-26 07:44:15,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3823.0, 300 sec: 3637.8). Total num frames: 9777152. Throughput: 0: 918.0. Samples: 1443326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:44:15,453][06480] Avg episode reward: [(0, '22.911')] +[2023-02-26 07:44:16,007][30947] Updated weights for policy 0, policy_version 2388 (0.0018) +[2023-02-26 07:44:20,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 9797632. Throughput: 0: 945.9. Samples: 1446754. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-26 07:44:20,455][06480] Avg episode reward: [(0, '23.117')] +[2023-02-26 07:44:24,909][30947] Updated weights for policy 0, policy_version 2398 (0.0014) +[2023-02-26 07:44:25,446][06480] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 9822208. Throughput: 0: 970.0. Samples: 1453820. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-26 07:44:25,451][06480] Avg episode reward: [(0, '24.133')] +[2023-02-26 07:44:30,446][06480] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 9834496. Throughput: 0: 917.6. Samples: 1458446. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-26 07:44:30,454][06480] Avg episode reward: [(0, '24.458')] +[2023-02-26 07:44:35,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3651.7). Total num frames: 9854976. Throughput: 0: 913.3. Samples: 1460634. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:44:35,448][06480] Avg episode reward: [(0, '24.038')] +[2023-02-26 07:44:37,305][30947] Updated weights for policy 0, policy_version 2408 (0.0031) +[2023-02-26 07:44:40,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 9875456. Throughput: 0: 955.0. Samples: 1466888. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:44:40,449][06480] Avg episode reward: [(0, '25.575')] +[2023-02-26 07:44:45,446][06480] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 9895936. Throughput: 0: 966.7. Samples: 1473726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-26 07:44:45,450][06480] Avg episode reward: [(0, '26.741')] +[2023-02-26 07:44:46,907][30947] Updated weights for policy 0, policy_version 2418 (0.0013) +[2023-02-26 07:44:50,449][06480] Fps is (10 sec: 3685.6, 60 sec: 3754.5, 300 sec: 3721.1). Total num frames: 9912320. Throughput: 0: 937.2. Samples: 1475902. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:44:50,451][06480] Avg episode reward: [(0, '26.634')] +[2023-02-26 07:44:55,446][06480] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 9928704. Throughput: 0: 917.6. Samples: 1480242. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:44:55,454][06480] Avg episode reward: [(0, '25.752')] +[2023-02-26 07:44:59,377][30947] Updated weights for policy 0, policy_version 2428 (0.0024) +[2023-02-26 07:45:00,446][06480] Fps is (10 sec: 3277.6, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 9945088. Throughput: 0: 942.7. Samples: 1485748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:45:00,449][06480] Avg episode reward: [(0, '25.547')] +[2023-02-26 07:45:05,449][06480] Fps is (10 sec: 3275.8, 60 sec: 3549.7, 300 sec: 3693.3). Total num frames: 9961472. Throughput: 0: 915.0. Samples: 1487930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-26 07:45:05,453][06480] Avg episode reward: [(0, '25.025')] +[2023-02-26 07:45:10,446][06480] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 9973760. Throughput: 0: 840.3. Samples: 1491634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-26 07:45:10,449][06480] Avg episode reward: [(0, '24.159')] +[2023-02-26 07:45:10,475][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002435_9973760.pth... +[2023-02-26 07:45:10,704][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002221_9097216.pth +[2023-02-26 07:45:14,509][30947] Updated weights for policy 0, policy_version 2438 (0.0024) +[2023-02-26 07:45:15,446][06480] Fps is (10 sec: 2458.3, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 9986048. Throughput: 0: 832.8. Samples: 1495922. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-26 07:45:15,449][06480] Avg episode reward: [(0, '23.011')] +[2023-02-26 07:45:19,186][30933] Stopping Batcher_0... +[2023-02-26 07:45:19,187][30933] Loop batcher_evt_loop terminating... +[2023-02-26 07:45:19,188][06480] Component Batcher_0 stopped! +[2023-02-26 07:45:19,192][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-26 07:45:19,255][30947] Weights refcount: 2 0 +[2023-02-26 07:45:19,294][30947] Stopping InferenceWorker_p0-w0... +[2023-02-26 07:45:19,298][30947] Loop inference_proc0-0_evt_loop terminating... +[2023-02-26 07:45:19,294][06480] Component InferenceWorker_p0-w0 stopped! +[2023-02-26 07:45:19,330][30933] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002328_9535488.pth +[2023-02-26 07:45:19,341][30933] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-26 07:45:19,451][30933] Stopping LearnerWorker_p0... +[2023-02-26 07:45:19,452][30933] Loop learner_proc0_evt_loop terminating... +[2023-02-26 07:45:19,453][06480] Component LearnerWorker_p0 stopped! +[2023-02-26 07:45:19,557][30962] Stopping RolloutWorker_w4... +[2023-02-26 07:45:19,562][30962] Loop rollout_proc4_evt_loop terminating... +[2023-02-26 07:45:19,558][06480] Component RolloutWorker_w7 stopped! +[2023-02-26 07:45:19,566][06480] Component RolloutWorker_w4 stopped! +[2023-02-26 07:45:19,574][06480] Component RolloutWorker_w1 stopped! +[2023-02-26 07:45:19,575][30952] Stopping RolloutWorker_w1... +[2023-02-26 07:45:19,576][30952] Loop rollout_proc1_evt_loop terminating... +[2023-02-26 07:45:19,554][30968] Stopping RolloutWorker_w7... +[2023-02-26 07:45:19,582][30958] Stopping RolloutWorker_w3... +[2023-02-26 07:45:19,583][30960] Stopping RolloutWorker_w5... +[2023-02-26 07:45:19,583][30960] Loop rollout_proc5_evt_loop terminating... +[2023-02-26 07:45:19,584][30958] Loop rollout_proc3_evt_loop terminating... +[2023-02-26 07:45:19,584][30968] Loop rollout_proc7_evt_loop terminating... +[2023-02-26 07:45:19,582][06480] Component RolloutWorker_w3 stopped! +[2023-02-26 07:45:19,585][06480] Component RolloutWorker_w5 stopped! +[2023-02-26 07:45:19,592][30970] Stopping RolloutWorker_w6... +[2023-02-26 07:45:19,592][30970] Loop rollout_proc6_evt_loop terminating... +[2023-02-26 07:45:19,596][06480] Component RolloutWorker_w6 stopped! +[2023-02-26 07:45:19,616][30951] Stopping RolloutWorker_w2... +[2023-02-26 07:45:19,616][06480] Component RolloutWorker_w2 stopped! +[2023-02-26 07:45:19,617][30951] Loop rollout_proc2_evt_loop terminating... +[2023-02-26 07:45:19,650][06480] Component RolloutWorker_w0 stopped! +[2023-02-26 07:45:19,653][06480] Waiting for process learner_proc0 to stop... +[2023-02-26 07:45:19,658][30948] Stopping RolloutWorker_w0... +[2023-02-26 07:45:19,664][30948] Loop rollout_proc0_evt_loop terminating... +[2023-02-26 07:45:23,164][06480] Waiting for process inference_proc0-0 to join... +[2023-02-26 07:45:23,191][06480] Waiting for process rollout_proc0 to join... +[2023-02-26 07:45:23,193][06480] Waiting for process rollout_proc1 to join... +[2023-02-26 07:45:23,195][06480] Waiting for process rollout_proc2 to join... +[2023-02-26 07:45:23,200][06480] Waiting for process rollout_proc3 to join... +[2023-02-26 07:45:23,203][06480] Waiting for process rollout_proc4 to join... +[2023-02-26 07:45:23,204][06480] Waiting for process rollout_proc5 to join... +[2023-02-26 07:45:23,205][06480] Waiting for process rollout_proc6 to join... +[2023-02-26 07:45:23,206][06480] Waiting for process rollout_proc7 to join... +[2023-02-26 07:45:23,208][06480] Batcher 0 profile tree view: +batching: 38.0811, releasing_batches: 0.0430 +[2023-02-26 07:45:23,209][06480] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 848.1106 +update_model: 11.1522 + weight_update: 0.0023 +one_step: 0.0032 + handle_policy_step: 748.4622 + deserialize: 21.9104, stack: 4.3503, obs_to_device_normalize: 169.2788, forward: 355.3837, send_messages: 39.5183 + prepare_outputs: 120.6791 + to_cpu: 74.4310 +[2023-02-26 07:45:23,210][06480] Learner 0 profile tree view: +misc: 0.0107, prepare_batch: 23.4033 +train: 117.9822 + epoch_init: 0.0165, minibatch_init: 0.0092, losses_postprocess: 0.8250, kl_divergence: 0.8165, after_optimizer: 4.6216 + calculate_losses: 39.1543 + losses_init: 0.0050, forward_head: 2.7332, bptt_initial: 25.6047, tail: 1.5578, advantages_returns: 0.4931, losses: 4.8133 + bptt: 3.4172 + bptt_forward_core: 3.2826 + update: 71.4942 + clip: 2.0844 +[2023-02-26 07:45:23,212][06480] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.6495, enqueue_policy_requests: 240.7376, env_step: 1244.5267, overhead: 33.3235, complete_rollouts: 10.1732 +save_policy_outputs: 32.8304 + split_output_tensors: 15.9062 +[2023-02-26 07:45:23,213][06480] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4744, enqueue_policy_requests: 239.4893, env_step: 1244.9929, overhead: 32.5271, complete_rollouts: 11.3433 +save_policy_outputs: 31.0280 + split_output_tensors: 15.2532 +[2023-02-26 07:45:23,215][06480] Loop Runner_EvtLoop terminating... +[2023-02-26 07:45:23,216][06480] Runner profile tree view: +main_loop: 1691.0690 +[2023-02-26 07:45:23,218][06480] Collected {0: 10006528}, FPS: 3548.4 +[2023-02-26 07:57:00,951][06480] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-26 07:57:00,953][06480] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-26 07:57:00,955][06480] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-26 07:57:00,958][06480] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-26 07:57:00,961][06480] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-26 07:57:00,962][06480] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-26 07:57:00,965][06480] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-26 07:57:00,967][06480] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-26 07:57:00,969][06480] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-26 07:57:00,970][06480] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-26 07:57:00,974][06480] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-26 07:57:00,975][06480] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-26 07:57:00,976][06480] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-26 07:57:00,978][06480] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-26 07:57:00,979][06480] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-26 07:57:01,011][06480] RunningMeanStd input shape: (3, 72, 128) +[2023-02-26 07:57:01,020][06480] RunningMeanStd input shape: (1,) +[2023-02-26 07:57:01,049][06480] ConvEncoder: input_channels=3 +[2023-02-26 07:57:01,210][06480] Conv encoder output size: 512 +[2023-02-26 07:57:01,212][06480] Policy head output size: 512 +[2023-02-26 07:57:01,330][06480] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-26 07:57:02,310][06480] Num frames 100... +[2023-02-26 07:57:02,426][06480] Num frames 200... +[2023-02-26 07:57:02,543][06480] Num frames 300... +[2023-02-26 07:57:02,666][06480] Num frames 400... +[2023-02-26 07:57:02,779][06480] Num frames 500... +[2023-02-26 07:57:02,898][06480] Num frames 600... +[2023-02-26 07:57:03,016][06480] Avg episode rewards: #0: 13.400, true rewards: #0: 6.400 +[2023-02-26 07:57:03,020][06480] Avg episode reward: 13.400, avg true_objective: 6.400 +[2023-02-26 07:57:03,092][06480] Num frames 700... +[2023-02-26 07:57:03,205][06480] Num frames 800... +[2023-02-26 07:57:03,325][06480] Num frames 900... +[2023-02-26 07:57:03,439][06480] Num frames 1000... +[2023-02-26 07:57:03,558][06480] Num frames 1100... +[2023-02-26 07:57:03,672][06480] Num frames 1200... +[2023-02-26 07:57:03,784][06480] Num frames 1300... +[2023-02-26 07:57:03,898][06480] Num frames 1400... +[2023-02-26 07:57:04,013][06480] Num frames 1500... +[2023-02-26 07:57:04,128][06480] Num frames 1600... +[2023-02-26 07:57:04,260][06480] Avg episode rewards: #0: 17.820, true rewards: #0: 8.320 +[2023-02-26 07:57:04,261][06480] Avg episode reward: 17.820, avg true_objective: 8.320 +[2023-02-26 07:57:04,315][06480] Num frames 1700... +[2023-02-26 07:57:04,432][06480] Num frames 1800... +[2023-02-26 07:57:04,561][06480] Num frames 1900... +[2023-02-26 07:57:04,677][06480] Num frames 2000... +[2023-02-26 07:57:04,800][06480] Num frames 2100... +[2023-02-26 07:57:04,915][06480] Num frames 2200... +[2023-02-26 07:57:05,031][06480] Num frames 2300... +[2023-02-26 07:57:05,145][06480] Num frames 2400... +[2023-02-26 07:57:05,264][06480] Num frames 2500... +[2023-02-26 07:57:05,394][06480] Num frames 2600... +[2023-02-26 07:57:05,524][06480] Num frames 2700... +[2023-02-26 07:57:05,641][06480] Num frames 2800... +[2023-02-26 07:57:05,764][06480] Num frames 2900... +[2023-02-26 07:57:05,889][06480] Num frames 3000... +[2023-02-26 07:57:06,008][06480] Num frames 3100... +[2023-02-26 07:57:06,129][06480] Num frames 3200... +[2023-02-26 07:57:06,254][06480] Avg episode rewards: #0: 24.200, true rewards: #0: 10.867 +[2023-02-26 07:57:06,256][06480] Avg episode reward: 24.200, avg true_objective: 10.867 +[2023-02-26 07:57:06,309][06480] Num frames 3300... +[2023-02-26 07:57:06,425][06480] Num frames 3400... +[2023-02-26 07:57:06,545][06480] Num frames 3500... +[2023-02-26 07:57:06,658][06480] Num frames 3600... +[2023-02-26 07:57:06,772][06480] Num frames 3700... +[2023-02-26 07:57:06,893][06480] Num frames 3800... +[2023-02-26 07:57:07,009][06480] Num frames 3900... +[2023-02-26 07:57:07,122][06480] Num frames 4000... +[2023-02-26 07:57:07,241][06480] Num frames 4100... +[2023-02-26 07:57:07,361][06480] Num frames 4200... +[2023-02-26 07:57:07,475][06480] Num frames 4300... +[2023-02-26 07:57:07,609][06480] Num frames 4400... +[2023-02-26 07:57:07,723][06480] Num frames 4500... +[2023-02-26 07:57:07,845][06480] Num frames 4600... +[2023-02-26 07:57:07,960][06480] Num frames 4700... +[2023-02-26 07:57:08,039][06480] Avg episode rewards: #0: 28.532, true rewards: #0: 11.782 +[2023-02-26 07:57:08,041][06480] Avg episode reward: 28.532, avg true_objective: 11.782 +[2023-02-26 07:57:08,143][06480] Num frames 4800... +[2023-02-26 07:57:08,259][06480] Num frames 4900... +[2023-02-26 07:57:08,376][06480] Num frames 5000... +[2023-02-26 07:57:08,504][06480] Avg episode rewards: #0: 23.730, true rewards: #0: 10.130 +[2023-02-26 07:57:08,507][06480] Avg episode reward: 23.730, avg true_objective: 10.130 +[2023-02-26 07:57:08,550][06480] Num frames 5100... +[2023-02-26 07:57:08,674][06480] Num frames 5200... +[2023-02-26 07:57:08,791][06480] Num frames 5300... +[2023-02-26 07:57:08,915][06480] Num frames 5400... +[2023-02-26 07:57:09,030][06480] Num frames 5500... +[2023-02-26 07:57:09,158][06480] Num frames 5600... +[2023-02-26 07:57:09,273][06480] Num frames 5700... +[2023-02-26 07:57:09,397][06480] Num frames 5800... +[2023-02-26 07:57:09,511][06480] Num frames 5900... +[2023-02-26 07:57:09,632][06480] Num frames 6000... +[2023-02-26 07:57:09,790][06480] Avg episode rewards: #0: 24.977, true rewards: #0: 10.143 +[2023-02-26 07:57:09,793][06480] Avg episode reward: 24.977, avg true_objective: 10.143 +[2023-02-26 07:57:09,813][06480] Num frames 6100... +[2023-02-26 07:57:09,950][06480] Num frames 6200... +[2023-02-26 07:57:10,091][06480] Num frames 6300... +[2023-02-26 07:57:10,253][06480] Num frames 6400... +[2023-02-26 07:57:10,420][06480] Num frames 6500... +[2023-02-26 07:57:10,573][06480] Num frames 6600... +[2023-02-26 07:57:10,731][06480] Num frames 6700... +[2023-02-26 07:57:10,884][06480] Avg episode rewards: #0: 23.083, true rewards: #0: 9.654 +[2023-02-26 07:57:10,887][06480] Avg episode reward: 23.083, avg true_objective: 9.654 +[2023-02-26 07:57:10,955][06480] Num frames 6800... +[2023-02-26 07:57:11,126][06480] Num frames 6900... +[2023-02-26 07:57:11,291][06480] Num frames 7000... +[2023-02-26 07:57:11,451][06480] Num frames 7100... +[2023-02-26 07:57:11,603][06480] Num frames 7200... +[2023-02-26 07:57:11,778][06480] Avg episode rewards: #0: 21.462, true rewards: #0: 9.087 +[2023-02-26 07:57:11,780][06480] Avg episode reward: 21.462, avg true_objective: 9.087 +[2023-02-26 07:57:11,835][06480] Num frames 7300... +[2023-02-26 07:57:12,003][06480] Num frames 7400... +[2023-02-26 07:57:12,172][06480] Num frames 7500... +[2023-02-26 07:57:12,333][06480] Num frames 7600... +[2023-02-26 07:57:12,494][06480] Num frames 7700... +[2023-02-26 07:57:12,656][06480] Num frames 7800... +[2023-02-26 07:57:12,821][06480] Num frames 7900... +[2023-02-26 07:57:12,977][06480] Num frames 8000... +[2023-02-26 07:57:13,138][06480] Num frames 8100... +[2023-02-26 07:57:13,303][06480] Num frames 8200... +[2023-02-26 07:57:13,467][06480] Num frames 8300... +[2023-02-26 07:57:13,633][06480] Num frames 8400... +[2023-02-26 07:57:13,751][06480] Num frames 8500... +[2023-02-26 07:57:13,867][06480] Num frames 8600... +[2023-02-26 07:57:13,981][06480] Num frames 8700... +[2023-02-26 07:57:14,095][06480] Num frames 8800... +[2023-02-26 07:57:14,206][06480] Num frames 8900... +[2023-02-26 07:57:14,324][06480] Num frames 9000... +[2023-02-26 07:57:14,442][06480] Num frames 9100... +[2023-02-26 07:57:14,553][06480] Num frames 9200... +[2023-02-26 07:57:14,666][06480] Num frames 9300... +[2023-02-26 07:57:14,808][06480] Avg episode rewards: #0: 25.522, true rewards: #0: 10.411 +[2023-02-26 07:57:14,810][06480] Avg episode reward: 25.522, avg true_objective: 10.411 +[2023-02-26 07:57:14,849][06480] Num frames 9400... +[2023-02-26 07:57:14,964][06480] Num frames 9500... +[2023-02-26 07:57:15,081][06480] Num frames 9600... +[2023-02-26 07:57:15,190][06480] Num frames 9700... +[2023-02-26 07:57:15,316][06480] Num frames 9800... +[2023-02-26 07:57:15,432][06480] Num frames 9900... +[2023-02-26 07:57:15,539][06480] Num frames 10000... +[2023-02-26 07:57:15,648][06480] Num frames 10100... +[2023-02-26 07:57:15,750][06480] Avg episode rewards: #0: 24.438, true rewards: #0: 10.138 +[2023-02-26 07:57:15,754][06480] Avg episode reward: 24.438, avg true_objective: 10.138 +[2023-02-26 07:58:17,864][06480] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-26 07:58:32,539][06480] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-26 07:58:32,541][06480] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-26 07:58:32,543][06480] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-26 07:58:32,545][06480] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-26 07:58:32,548][06480] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-26 07:58:32,550][06480] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-26 07:58:32,552][06480] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-26 07:58:32,553][06480] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-26 07:58:32,555][06480] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-26 07:58:32,557][06480] Adding new argument 'hf_repository'='sd99/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-26 07:58:32,558][06480] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-26 07:58:32,560][06480] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-26 07:58:32,562][06480] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-26 07:58:32,563][06480] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-26 07:58:32,565][06480] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-26 07:58:32,587][06480] RunningMeanStd input shape: (3, 72, 128) +[2023-02-26 07:58:32,591][06480] RunningMeanStd input shape: (1,) +[2023-02-26 07:58:32,606][06480] ConvEncoder: input_channels=3 +[2023-02-26 07:58:32,641][06480] Conv encoder output size: 512 +[2023-02-26 07:58:32,643][06480] Policy head output size: 512 +[2023-02-26 07:58:32,662][06480] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-26 07:58:33,110][06480] Num frames 100... +[2023-02-26 07:58:33,232][06480] Num frames 200... +[2023-02-26 07:58:33,351][06480] Num frames 300... +[2023-02-26 07:58:33,461][06480] Num frames 400... +[2023-02-26 07:58:33,575][06480] Num frames 500... +[2023-02-26 07:58:33,695][06480] Num frames 600... +[2023-02-26 07:58:33,761][06480] Avg episode rewards: #0: 9.080, true rewards: #0: 6.080 +[2023-02-26 07:58:33,764][06480] Avg episode reward: 9.080, avg true_objective: 6.080 +[2023-02-26 07:58:33,877][06480] Num frames 700... +[2023-02-26 07:58:33,997][06480] Num frames 800... +[2023-02-26 07:58:34,108][06480] Num frames 900... +[2023-02-26 07:58:34,223][06480] Num frames 1000... +[2023-02-26 07:58:34,343][06480] Num frames 1100... +[2023-02-26 07:58:34,459][06480] Num frames 1200... +[2023-02-26 07:58:34,582][06480] Num frames 1300... +[2023-02-26 07:58:34,705][06480] Num frames 1400... +[2023-02-26 07:58:34,827][06480] Num frames 1500... +[2023-02-26 07:58:34,947][06480] Num frames 1600... +[2023-02-26 07:58:35,111][06480] Avg episode rewards: #0: 17.320, true rewards: #0: 8.320 +[2023-02-26 07:58:35,114][06480] Avg episode reward: 17.320, avg true_objective: 8.320 +[2023-02-26 07:58:35,177][06480] Num frames 1700... +[2023-02-26 07:58:35,346][06480] Num frames 1800... +[2023-02-26 07:58:35,503][06480] Num frames 1900... +[2023-02-26 07:58:35,659][06480] Num frames 2000... +[2023-02-26 07:58:35,820][06480] Num frames 2100... +[2023-02-26 07:58:35,985][06480] Num frames 2200... +[2023-02-26 07:58:36,146][06480] Num frames 2300... +[2023-02-26 07:58:36,307][06480] Num frames 2400... +[2023-02-26 07:58:36,471][06480] Num frames 2500... +[2023-02-26 07:58:36,636][06480] Num frames 2600... +[2023-02-26 07:58:36,800][06480] Num frames 2700... +[2023-02-26 07:58:36,971][06480] Num frames 2800... +[2023-02-26 07:58:37,140][06480] Num frames 2900... +[2023-02-26 07:58:37,287][06480] Avg episode rewards: #0: 21.504, true rewards: #0: 9.837 +[2023-02-26 07:58:37,290][06480] Avg episode reward: 21.504, avg true_objective: 9.837 +[2023-02-26 07:58:37,377][06480] Num frames 3000... +[2023-02-26 07:58:37,545][06480] Num frames 3100... +[2023-02-26 07:58:37,706][06480] Num frames 3200... +[2023-02-26 07:58:37,875][06480] Num frames 3300... +[2023-02-26 07:58:38,048][06480] Num frames 3400... +[2023-02-26 07:58:38,216][06480] Num frames 3500... +[2023-02-26 07:58:38,388][06480] Num frames 3600... +[2023-02-26 07:58:38,532][06480] Num frames 3700... +[2023-02-26 07:58:38,647][06480] Num frames 3800... +[2023-02-26 07:58:38,762][06480] Num frames 3900... +[2023-02-26 07:58:38,872][06480] Num frames 4000... +[2023-02-26 07:58:38,996][06480] Num frames 4100... +[2023-02-26 07:58:39,119][06480] Num frames 4200... +[2023-02-26 07:58:39,237][06480] Num frames 4300... +[2023-02-26 07:58:39,361][06480] Avg episode rewards: #0: 24.398, true rewards: #0: 10.897 +[2023-02-26 07:58:39,363][06480] Avg episode reward: 24.398, avg true_objective: 10.897 +[2023-02-26 07:58:39,413][06480] Num frames 4400... +[2023-02-26 07:58:39,526][06480] Num frames 4500... +[2023-02-26 07:58:39,646][06480] Num frames 4600... +[2023-02-26 07:58:39,758][06480] Num frames 4700... +[2023-02-26 07:58:39,876][06480] Num frames 4800... +[2023-02-26 07:58:40,011][06480] Num frames 4900... +[2023-02-26 07:58:40,123][06480] Num frames 5000... +[2023-02-26 07:58:40,238][06480] Num frames 5100... +[2023-02-26 07:58:40,355][06480] Num frames 5200... +[2023-02-26 07:58:40,467][06480] Num frames 5300... +[2023-02-26 07:58:40,579][06480] Num frames 5400... +[2023-02-26 07:58:40,695][06480] Num frames 5500... +[2023-02-26 07:58:40,814][06480] Num frames 5600... +[2023-02-26 07:58:40,968][06480] Avg episode rewards: #0: 26.380, true rewards: #0: 11.380 +[2023-02-26 07:58:40,970][06480] Avg episode reward: 26.380, avg true_objective: 11.380 +[2023-02-26 07:58:40,986][06480] Num frames 5700... +[2023-02-26 07:58:41,095][06480] Num frames 5800... +[2023-02-26 07:58:41,211][06480] Num frames 5900... +[2023-02-26 07:58:41,331][06480] Num frames 6000... +[2023-02-26 07:58:41,455][06480] Num frames 6100... +[2023-02-26 07:58:41,587][06480] Num frames 6200... +[2023-02-26 07:58:41,710][06480] Num frames 6300... +[2023-02-26 07:58:41,836][06480] Num frames 6400... +[2023-02-26 07:58:41,958][06480] Num frames 6500... +[2023-02-26 07:58:42,086][06480] Num frames 6600... +[2023-02-26 07:58:42,204][06480] Num frames 6700... +[2023-02-26 07:58:42,337][06480] Num frames 6800... +[2023-02-26 07:58:42,456][06480] Num frames 6900... +[2023-02-26 07:58:42,574][06480] Num frames 7000... +[2023-02-26 07:58:42,674][06480] Avg episode rewards: #0: 26.887, true rewards: #0: 11.720 +[2023-02-26 07:58:42,677][06480] Avg episode reward: 26.887, avg true_objective: 11.720 +[2023-02-26 07:58:42,766][06480] Num frames 7100... +[2023-02-26 07:58:42,879][06480] Num frames 7200... +[2023-02-26 07:58:42,998][06480] Num frames 7300... +[2023-02-26 07:58:43,108][06480] Num frames 7400... +[2023-02-26 07:58:43,232][06480] Num frames 7500... +[2023-02-26 07:58:43,347][06480] Num frames 7600... +[2023-02-26 07:58:43,457][06480] Num frames 7700... +[2023-02-26 07:58:43,569][06480] Num frames 7800... +[2023-02-26 07:58:43,679][06480] Num frames 7900... +[2023-02-26 07:58:43,799][06480] Num frames 8000... +[2023-02-26 07:58:43,911][06480] Num frames 8100... +[2023-02-26 07:58:44,064][06480] Avg episode rewards: #0: 26.834, true rewards: #0: 11.691 +[2023-02-26 07:58:44,066][06480] Avg episode reward: 26.834, avg true_objective: 11.691 +[2023-02-26 07:58:44,087][06480] Num frames 8200... +[2023-02-26 07:58:44,202][06480] Num frames 8300... +[2023-02-26 07:58:44,315][06480] Num frames 8400... +[2023-02-26 07:58:44,429][06480] Num frames 8500... +[2023-02-26 07:58:44,540][06480] Num frames 8600... +[2023-02-26 07:58:44,655][06480] Num frames 8700... +[2023-02-26 07:58:44,767][06480] Num frames 8800... +[2023-02-26 07:58:44,880][06480] Num frames 8900... +[2023-02-26 07:58:45,012][06480] Num frames 9000... +[2023-02-26 07:58:45,132][06480] Num frames 9100... +[2023-02-26 07:58:45,245][06480] Num frames 9200... +[2023-02-26 07:58:45,359][06480] Num frames 9300... +[2023-02-26 07:58:45,479][06480] Num frames 9400... +[2023-02-26 07:58:45,591][06480] Num frames 9500... +[2023-02-26 07:58:45,702][06480] Num frames 9600... +[2023-02-26 07:58:45,822][06480] Num frames 9700... +[2023-02-26 07:58:45,933][06480] Num frames 9800... +[2023-02-26 07:58:46,080][06480] Avg episode rewards: #0: 28.100, true rewards: #0: 12.350 +[2023-02-26 07:58:46,081][06480] Avg episode reward: 28.100, avg true_objective: 12.350 +[2023-02-26 07:58:46,110][06480] Num frames 9900... +[2023-02-26 07:58:46,226][06480] Num frames 10000... +[2023-02-26 07:58:46,340][06480] Num frames 10100... +[2023-02-26 07:58:46,451][06480] Num frames 10200... +[2023-02-26 07:58:46,543][06480] Avg episode rewards: #0: 25.480, true rewards: #0: 11.369 +[2023-02-26 07:58:46,545][06480] Avg episode reward: 25.480, avg true_objective: 11.369 +[2023-02-26 07:58:46,627][06480] Num frames 10300... +[2023-02-26 07:58:46,737][06480] Num frames 10400... +[2023-02-26 07:58:46,856][06480] Num frames 10500... +[2023-02-26 07:58:46,970][06480] Num frames 10600... +[2023-02-26 07:58:47,089][06480] Num frames 10700... +[2023-02-26 07:58:47,218][06480] Num frames 10800... +[2023-02-26 07:58:47,334][06480] Num frames 10900... +[2023-02-26 07:58:47,450][06480] Num frames 11000... +[2023-02-26 07:58:47,563][06480] Num frames 11100... +[2023-02-26 07:58:47,683][06480] Num frames 11200... +[2023-02-26 07:58:47,800][06480] Num frames 11300... +[2023-02-26 07:58:47,921][06480] Num frames 11400... +[2023-02-26 07:58:48,092][06480] Avg episode rewards: #0: 25.995, true rewards: #0: 11.495 +[2023-02-26 07:58:48,095][06480] Avg episode reward: 25.995, avg true_objective: 11.495 +[2023-02-26 07:59:56,374][06480] Replay video saved to /content/train_dir/default_experiment/replay.mp4!