diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,2926 @@ +[2024-10-02 02:24:57,167][00821] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-10-02 02:24:57,172][00821] Rollout worker 0 uses device cpu +[2024-10-02 02:24:57,174][00821] Rollout worker 1 uses device cpu +[2024-10-02 02:24:57,179][00821] Rollout worker 2 uses device cpu +[2024-10-02 02:24:57,182][00821] Rollout worker 3 uses device cpu +[2024-10-02 02:24:57,185][00821] Rollout worker 4 uses device cpu +[2024-10-02 02:24:57,187][00821] Rollout worker 5 uses device cpu +[2024-10-02 02:24:57,194][00821] Rollout worker 6 uses device cpu +[2024-10-02 02:24:57,204][00821] Rollout worker 7 uses device cpu +[2024-10-02 02:30:22,618][00821] Environment doom_basic already registered, overwriting... +[2024-10-02 02:30:22,624][00821] Environment doom_two_colors_easy already registered, overwriting... +[2024-10-02 02:30:22,627][00821] Environment doom_two_colors_hard already registered, overwriting... +[2024-10-02 02:30:22,629][00821] Environment doom_dm already registered, overwriting... +[2024-10-02 02:30:22,631][00821] Environment doom_dwango5 already registered, overwriting... +[2024-10-02 02:30:22,634][00821] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-10-02 02:30:22,636][00821] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-10-02 02:30:22,637][00821] Environment doom_my_way_home already registered, overwriting... +[2024-10-02 02:30:22,639][00821] Environment doom_deadly_corridor already registered, overwriting... +[2024-10-02 02:30:22,641][00821] Environment doom_defend_the_center already registered, overwriting... +[2024-10-02 02:30:22,642][00821] Environment doom_defend_the_line already registered, overwriting... +[2024-10-02 02:30:22,644][00821] Environment doom_health_gathering already registered, overwriting... +[2024-10-02 02:30:22,646][00821] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-10-02 02:30:22,652][00821] Environment doom_battle already registered, overwriting... +[2024-10-02 02:30:22,654][00821] Environment doom_battle2 already registered, overwriting... +[2024-10-02 02:30:22,656][00821] Environment doom_duel_bots already registered, overwriting... +[2024-10-02 02:30:22,657][00821] Environment doom_deathmatch_bots already registered, overwriting... +[2024-10-02 02:30:22,659][00821] Environment doom_duel already registered, overwriting... +[2024-10-02 02:30:22,664][00821] Environment doom_deathmatch_full already registered, overwriting... +[2024-10-02 02:30:22,667][00821] Environment doom_benchmark already registered, overwriting... +[2024-10-02 02:30:22,671][00821] register_encoder_factory: +[2024-10-02 02:30:22,703][00821] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-02 02:30:22,704][00821] Overriding arg 'device' with value 'cpu' passed from command line +[2024-10-02 02:30:22,717][00821] Experiment dir /content/train_dir/default_experiment already exists! +[2024-10-02 02:30:22,719][00821] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-10-02 02:30:22,723][00821] Weights and Biases integration disabled +[2024-10-02 02:30:22,729][00821] Environment var CUDA_VISIBLE_DEVICES is + +[2024-10-02 02:30:25,009][00821] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=cpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=4000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-10-02 02:30:25,012][00821] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-10-02 02:30:25,018][00821] Rollout worker 0 uses device cpu +[2024-10-02 02:30:25,020][00821] Rollout worker 1 uses device cpu +[2024-10-02 02:30:25,023][00821] Rollout worker 2 uses device cpu +[2024-10-02 02:30:25,026][00821] Rollout worker 3 uses device cpu +[2024-10-02 02:30:25,028][00821] Rollout worker 4 uses device cpu +[2024-10-02 02:30:25,030][00821] Rollout worker 5 uses device cpu +[2024-10-02 02:30:25,033][00821] Rollout worker 6 uses device cpu +[2024-10-02 02:30:25,034][00821] Rollout worker 7 uses device cpu +[2024-10-02 02:30:25,208][00821] InferenceWorker_p0-w0: min num requests: 2 +[2024-10-02 02:30:25,258][00821] Starting all processes... +[2024-10-02 02:30:25,260][00821] Starting process learner_proc0 +[2024-10-02 02:30:25,319][00821] Starting all processes... +[2024-10-02 02:30:25,334][00821] Starting process inference_proc0-0 +[2024-10-02 02:30:25,335][00821] Starting process rollout_proc0 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc1 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc2 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc3 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc4 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc5 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc6 +[2024-10-02 02:30:25,336][00821] Starting process rollout_proc7 +[2024-10-02 02:30:41,622][04725] Starting seed is not provided +[2024-10-02 02:30:41,624][04725] Initializing actor-critic model on device cpu +[2024-10-02 02:30:41,625][04725] RunningMeanStd input shape: (3, 72, 128) +[2024-10-02 02:30:41,627][04725] RunningMeanStd input shape: (1,) +[2024-10-02 02:30:41,727][04725] ConvEncoder: input_channels=3 +[2024-10-02 02:30:41,935][04745] Worker 6 uses CPU cores [0] +[2024-10-02 02:30:42,020][04743] Worker 3 uses CPU cores [1] +[2024-10-02 02:30:42,205][04742] Worker 4 uses CPU cores [0] +[2024-10-02 02:30:42,233][04738] Worker 0 uses CPU cores [0] +[2024-10-02 02:30:42,277][04741] Worker 2 uses CPU cores [0] +[2024-10-02 02:30:42,297][04740] Worker 1 uses CPU cores [1] +[2024-10-02 02:30:42,344][04746] Worker 7 uses CPU cores [1] +[2024-10-02 02:30:42,385][04744] Worker 5 uses CPU cores [1] +[2024-10-02 02:30:42,406][04725] Conv encoder output size: 512 +[2024-10-02 02:30:42,407][04725] Policy head output size: 512 +[2024-10-02 02:30:42,432][04725] Created Actor Critic model with architecture: +[2024-10-02 02:30:42,432][04725] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-10-02 02:30:43,043][04725] Using optimizer +[2024-10-02 02:30:43,046][04725] No checkpoints found +[2024-10-02 02:30:43,046][04725] Did not load from checkpoint, starting from scratch! +[2024-10-02 02:30:43,046][04725] Initialized policy 0 weights for model version 0 +[2024-10-02 02:30:43,050][04725] LearnerWorker_p0 finished initialization! +[2024-10-02 02:30:43,056][04739] RunningMeanStd input shape: (3, 72, 128) +[2024-10-02 02:30:43,059][04739] RunningMeanStd input shape: (1,) +[2024-10-02 02:30:43,095][04739] ConvEncoder: input_channels=3 +[2024-10-02 02:30:43,296][04739] Conv encoder output size: 512 +[2024-10-02 02:30:43,296][04739] Policy head output size: 512 +[2024-10-02 02:30:43,327][00821] Inference worker 0-0 is ready! +[2024-10-02 02:30:43,328][00821] All inference workers are ready! Signal rollout workers to start! +[2024-10-02 02:30:43,488][04746] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,482][04744] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,494][04740] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,496][04743] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,502][04738] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,499][04745] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,504][04742] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:43,505][04741] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 02:30:45,198][00821] Heartbeat connected on Batcher_0 +[2024-10-02 02:30:45,207][00821] Heartbeat connected on LearnerWorker_p0 +[2024-10-02 02:30:45,260][00821] Heartbeat connected on InferenceWorker_p0-w0 +[2024-10-02 02:30:45,870][04743] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,872][04746] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,873][04740] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,874][04744] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,872][04741] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,873][04745] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,870][04742] Decorrelating experience for 0 frames... +[2024-10-02 02:30:45,872][04738] Decorrelating experience for 0 frames... +[2024-10-02 02:30:47,448][04742] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,457][04745] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,463][04738] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,582][04743] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,578][04746] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,601][04744] Decorrelating experience for 32 frames... +[2024-10-02 02:30:47,736][00821] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-02 02:30:49,582][04741] Decorrelating experience for 32 frames... +[2024-10-02 02:30:49,874][04742] Decorrelating experience for 64 frames... +[2024-10-02 02:30:49,899][04745] Decorrelating experience for 64 frames... +[2024-10-02 02:30:50,546][04744] Decorrelating experience for 64 frames... +[2024-10-02 02:30:50,555][04746] Decorrelating experience for 64 frames... +[2024-10-02 02:30:50,577][04743] Decorrelating experience for 64 frames... +[2024-10-02 02:30:51,829][04740] Decorrelating experience for 32 frames... +[2024-10-02 02:30:52,729][00821] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-02 02:30:52,750][04744] Decorrelating experience for 96 frames... +[2024-10-02 02:30:52,771][04743] Decorrelating experience for 96 frames... +[2024-10-02 02:30:53,067][04741] Decorrelating experience for 64 frames... +[2024-10-02 02:30:53,072][04738] Decorrelating experience for 64 frames... +[2024-10-02 02:30:53,450][04742] Decorrelating experience for 96 frames... +[2024-10-02 02:30:53,462][04745] Decorrelating experience for 96 frames... +[2024-10-02 02:30:53,579][00821] Heartbeat connected on RolloutWorker_w5 +[2024-10-02 02:30:53,615][00821] Heartbeat connected on RolloutWorker_w3 +[2024-10-02 02:30:54,120][00821] Heartbeat connected on RolloutWorker_w4 +[2024-10-02 02:30:54,133][00821] Heartbeat connected on RolloutWorker_w6 +[2024-10-02 02:30:55,791][04738] Decorrelating experience for 96 frames... +[2024-10-02 02:30:56,007][04740] Decorrelating experience for 64 frames... +[2024-10-02 02:30:56,094][00821] Heartbeat connected on RolloutWorker_w0 +[2024-10-02 02:30:57,337][04741] Decorrelating experience for 96 frames... +[2024-10-02 02:30:57,691][00821] Heartbeat connected on RolloutWorker_w2 +[2024-10-02 02:30:57,731][00821] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 41.4. Samples: 414. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-02 02:30:57,737][00821] Avg episode reward: [(0, '1.862')] +[2024-10-02 02:30:58,957][04746] Decorrelating experience for 96 frames... +[2024-10-02 02:30:59,377][04740] Decorrelating experience for 96 frames... +[2024-10-02 02:30:59,688][00821] Heartbeat connected on RolloutWorker_w7 +[2024-10-02 02:31:00,001][00821] Heartbeat connected on RolloutWorker_w1 +[2024-10-02 02:31:02,061][04725] Signal inference workers to stop experience collection... +[2024-10-02 02:31:02,112][04739] InferenceWorker_p0-w0: stopping experience collection +[2024-10-02 02:31:02,729][00821] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 162.7. Samples: 2440. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-02 02:31:02,732][00821] Avg episode reward: [(0, '2.813')] +[2024-10-02 02:31:03,487][04725] Signal inference workers to resume experience collection... +[2024-10-02 02:31:03,489][04739] InferenceWorker_p0-w0: resuming experience collection +[2024-10-02 02:31:07,729][00821] Fps is (10 sec: 409.7, 60 sec: 204.9, 300 sec: 204.9). Total num frames: 4096. Throughput: 0: 188.7. Samples: 3772. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-10-02 02:31:07,732][00821] Avg episode reward: [(0, '3.055')] +[2024-10-02 02:31:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 327.8, 300 sec: 327.8). Total num frames: 8192. Throughput: 0: 152.4. Samples: 3808. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2024-10-02 02:31:12,734][00821] Avg episode reward: [(0, '3.123')] +[2024-10-02 02:31:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 409.7, 300 sec: 409.7). Total num frames: 12288. Throughput: 0: 165.1. Samples: 4952. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2024-10-02 02:31:17,732][00821] Avg episode reward: [(0, '3.361')] +[2024-10-02 02:31:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 468.2, 300 sec: 468.2). Total num frames: 16384. Throughput: 0: 191.7. Samples: 6708. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:31:22,736][00821] Avg episode reward: [(0, '3.666')] +[2024-10-02 02:31:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 512.1, 300 sec: 512.1). Total num frames: 20480. Throughput: 0: 185.9. Samples: 7434. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:31:27,736][00821] Avg episode reward: [(0, '3.844')] +[2024-10-02 02:31:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 546.2, 300 sec: 546.2). Total num frames: 24576. Throughput: 0: 184.0. Samples: 8278. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:32,736][00821] Avg episode reward: [(0, '3.884')] +[2024-10-02 02:31:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 573.5, 300 sec: 573.5). Total num frames: 28672. Throughput: 0: 217.3. Samples: 9778. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:37,740][00821] Avg episode reward: [(0, '3.883')] +[2024-10-02 02:31:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 670.3, 300 sec: 670.3). Total num frames: 36864. Throughput: 0: 226.5. Samples: 10608. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:42,740][00821] Avg episode reward: [(0, '3.960')] +[2024-10-02 02:31:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 614.5, 300 sec: 614.5). Total num frames: 36864. Throughput: 0: 207.0. Samples: 11754. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:47,732][00821] Avg episode reward: [(0, '4.116')] +[2024-10-02 02:31:48,241][04739] Updated weights for policy 0, policy_version 10 (0.1606) +[2024-10-02 02:31:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 630.2). Total num frames: 40960. Throughput: 0: 202.3. Samples: 12876. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:52,732][00821] Avg episode reward: [(0, '4.246')] +[2024-10-02 02:31:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 702.2). Total num frames: 49152. Throughput: 0: 221.2. Samples: 13764. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:31:57,736][00821] Avg episode reward: [(0, '4.414')] +[2024-10-02 02:32:02,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 710.0). Total num frames: 53248. Throughput: 0: 222.9. Samples: 14982. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:32:02,737][00821] Avg episode reward: [(0, '4.493')] +[2024-10-02 02:32:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 716.9). Total num frames: 57344. Throughput: 0: 205.7. Samples: 15964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:32:07,736][00821] Avg episode reward: [(0, '4.461')] +[2024-10-02 02:32:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 722.9). Total num frames: 61440. Throughput: 0: 206.8. Samples: 16738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:32:12,732][00821] Avg episode reward: [(0, '4.415')] +[2024-10-02 02:32:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 728.2). Total num frames: 65536. Throughput: 0: 218.5. Samples: 18112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:17,734][00821] Avg episode reward: [(0, '4.431')] +[2024-10-02 02:32:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 733.0). Total num frames: 69632. Throughput: 0: 215.7. Samples: 19484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:22,733][00821] Avg episode reward: [(0, '4.411')] +[2024-10-02 02:32:26,159][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000018_73728.pth... +[2024-10-02 02:32:27,733][00821] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 737.3). Total num frames: 73728. Throughput: 0: 207.7. Samples: 19954. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:27,736][00821] Avg episode reward: [(0, '4.411')] +[2024-10-02 02:32:32,730][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 741.2). Total num frames: 77824. Throughput: 0: 207.3. Samples: 21084. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:32,733][00821] Avg episode reward: [(0, '4.598')] +[2024-10-02 02:32:34,712][04739] Updated weights for policy 0, policy_version 20 (0.0522) +[2024-10-02 02:32:37,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 744.8). Total num frames: 81920. Throughput: 0: 224.5. Samples: 22980. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:37,740][00821] Avg episode reward: [(0, '4.602')] +[2024-10-02 02:32:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 748.0). Total num frames: 86016. Throughput: 0: 211.3. Samples: 23274. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:42,732][00821] Avg episode reward: [(0, '4.434')] +[2024-10-02 02:32:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 751.0). Total num frames: 90112. Throughput: 0: 207.6. Samples: 24322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:47,732][00821] Avg episode reward: [(0, '4.480')] +[2024-10-02 02:32:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 753.7). Total num frames: 94208. Throughput: 0: 221.6. Samples: 25936. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:32:52,735][00821] Avg episode reward: [(0, '4.457')] +[2024-10-02 02:32:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 756.2). Total num frames: 98304. Throughput: 0: 218.8. Samples: 26584. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:32:57,735][00821] Avg episode reward: [(0, '4.490')] +[2024-10-02 02:33:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 758.6). Total num frames: 102400. Throughput: 0: 213.1. Samples: 27700. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:33:02,734][00821] Avg episode reward: [(0, '4.405')] +[2024-10-02 02:33:04,246][04725] Saving new best policy, reward=4.405! +[2024-10-02 02:33:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 760.7). Total num frames: 106496. Throughput: 0: 214.1. Samples: 29118. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 02:33:07,735][00821] Avg episode reward: [(0, '4.476')] +[2024-10-02 02:33:12,481][04725] Saving new best policy, reward=4.476! +[2024-10-02 02:33:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 791.0). Total num frames: 114688. Throughput: 0: 216.1. Samples: 29678. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:33:12,734][00821] Avg episode reward: [(0, '4.509')] +[2024-10-02 02:33:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 764.6). Total num frames: 114688. Throughput: 0: 224.0. Samples: 31166. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:33:17,731][00821] Avg episode reward: [(0, '4.581')] +[2024-10-02 02:33:17,936][04725] Saving new best policy, reward=4.509! +[2024-10-02 02:33:22,736][00821] Fps is (10 sec: 409.3, 60 sec: 819.1, 300 sec: 766.3). Total num frames: 118784. Throughput: 0: 205.2. Samples: 32216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:33:22,744][00821] Avg episode reward: [(0, '4.478')] +[2024-10-02 02:33:23,459][04725] Saving new best policy, reward=4.581! +[2024-10-02 02:33:23,469][04739] Updated weights for policy 0, policy_version 30 (0.1031) +[2024-10-02 02:33:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 793.6). Total num frames: 126976. Throughput: 0: 215.7. Samples: 32980. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:33:27,736][00821] Avg episode reward: [(0, '4.560')] +[2024-10-02 02:33:32,734][00821] Fps is (10 sec: 1229.0, 60 sec: 887.4, 300 sec: 794.4). Total num frames: 131072. Throughput: 0: 221.2. Samples: 34278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:33:32,739][00821] Avg episode reward: [(0, '4.626')] +[2024-10-02 02:33:37,356][04725] Saving new best policy, reward=4.626! +[2024-10-02 02:33:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 795.1). Total num frames: 135168. Throughput: 0: 208.0. Samples: 35298. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:33:37,737][00821] Avg episode reward: [(0, '4.729')] +[2024-10-02 02:33:42,569][04725] Saving new best policy, reward=4.729! +[2024-10-02 02:33:42,729][00821] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 795.8). Total num frames: 139264. Throughput: 0: 208.2. Samples: 35952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:33:42,733][00821] Avg episode reward: [(0, '4.676')] +[2024-10-02 02:33:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 796.5). Total num frames: 143360. Throughput: 0: 215.1. Samples: 37380. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:33:47,732][00821] Avg episode reward: [(0, '4.732')] +[2024-10-02 02:33:50,781][04725] Saving new best policy, reward=4.732! +[2024-10-02 02:33:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 797.1). Total num frames: 147456. Throughput: 0: 214.7. Samples: 38780. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:33:52,741][00821] Avg episode reward: [(0, '4.807')] +[2024-10-02 02:33:57,033][04725] Saving new best policy, reward=4.807! +[2024-10-02 02:33:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 797.7). Total num frames: 151552. Throughput: 0: 216.8. Samples: 39434. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:33:57,731][00821] Avg episode reward: [(0, '4.757')] +[2024-10-02 02:34:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 798.2). Total num frames: 155648. Throughput: 0: 206.4. Samples: 40452. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:02,738][00821] Avg episode reward: [(0, '4.745')] +[2024-10-02 02:34:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 798.7). Total num frames: 159744. Throughput: 0: 218.2. Samples: 42032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:07,736][00821] Avg episode reward: [(0, '4.699')] +[2024-10-02 02:34:10,114][04739] Updated weights for policy 0, policy_version 40 (0.1473) +[2024-10-02 02:34:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 799.2). Total num frames: 163840. Throughput: 0: 213.2. Samples: 42576. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:12,736][00821] Avg episode reward: [(0, '4.736')] +[2024-10-02 02:34:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 799.7). Total num frames: 167936. Throughput: 0: 205.4. Samples: 43522. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:17,734][00821] Avg episode reward: [(0, '4.663')] +[2024-10-02 02:34:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 800.2). Total num frames: 172032. Throughput: 0: 221.2. Samples: 45252. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:22,734][00821] Avg episode reward: [(0, '4.593')] +[2024-10-02 02:34:24,586][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000043_176128.pth... +[2024-10-02 02:34:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 800.6). Total num frames: 176128. Throughput: 0: 220.7. Samples: 45882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:27,737][00821] Avg episode reward: [(0, '4.569')] +[2024-10-02 02:34:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 801.0). Total num frames: 180224. Throughput: 0: 213.8. Samples: 47000. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:32,732][00821] Avg episode reward: [(0, '4.592')] +[2024-10-02 02:34:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 801.4). Total num frames: 184320. Throughput: 0: 212.0. Samples: 48318. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:37,732][00821] Avg episode reward: [(0, '4.491')] +[2024-10-02 02:34:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 801.8). Total num frames: 188416. Throughput: 0: 210.8. Samples: 48920. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:42,738][00821] Avg episode reward: [(0, '4.600')] +[2024-10-02 02:34:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 802.2). Total num frames: 192512. Throughput: 0: 223.6. Samples: 50514. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:47,733][00821] Avg episode reward: [(0, '4.531')] +[2024-10-02 02:34:52,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 802.5). Total num frames: 196608. Throughput: 0: 208.2. Samples: 51400. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:52,738][00821] Avg episode reward: [(0, '4.515')] +[2024-10-02 02:34:57,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 802.8). Total num frames: 200704. Throughput: 0: 205.1. Samples: 51806. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:34:57,738][00821] Avg episode reward: [(0, '4.541')] +[2024-10-02 02:34:58,727][04739] Updated weights for policy 0, policy_version 50 (0.0534) +[2024-10-02 02:35:01,228][04725] Signal inference workers to stop experience collection... (50 times) +[2024-10-02 02:35:01,288][04739] InferenceWorker_p0-w0: stopping experience collection (50 times) +[2024-10-02 02:35:02,729][00821] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 803.2). Total num frames: 204800. Throughput: 0: 227.7. Samples: 53768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:02,737][00821] Avg episode reward: [(0, '4.512')] +[2024-10-02 02:35:02,760][04725] Signal inference workers to resume experience collection... (50 times) +[2024-10-02 02:35:02,762][04739] InferenceWorker_p0-w0: resuming experience collection (50 times) +[2024-10-02 02:35:07,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 803.5). Total num frames: 208896. Throughput: 0: 209.1. Samples: 54660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:07,736][00821] Avg episode reward: [(0, '4.516')] +[2024-10-02 02:35:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 803.8). Total num frames: 212992. Throughput: 0: 205.3. Samples: 55120. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:12,733][00821] Avg episode reward: [(0, '4.425')] +[2024-10-02 02:35:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 804.0). Total num frames: 217088. Throughput: 0: 217.0. Samples: 56766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:17,739][00821] Avg episode reward: [(0, '4.408')] +[2024-10-02 02:35:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 225280. Throughput: 0: 213.6. Samples: 57932. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:22,737][00821] Avg episode reward: [(0, '4.422')] +[2024-10-02 02:35:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 804.6). Total num frames: 225280. Throughput: 0: 215.2. Samples: 58606. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:27,736][00821] Avg episode reward: [(0, '4.503')] +[2024-10-02 02:35:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 804.8). Total num frames: 229376. Throughput: 0: 207.6. Samples: 59854. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:32,734][00821] Avg episode reward: [(0, '4.503')] +[2024-10-02 02:35:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 237568. Throughput: 0: 216.0. Samples: 61120. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:35:37,737][00821] Avg episode reward: [(0, '4.607')] +[2024-10-02 02:35:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 241664. Throughput: 0: 224.6. Samples: 61914. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:35:42,733][00821] Avg episode reward: [(0, '4.664')] +[2024-10-02 02:35:47,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 241664. Throughput: 0: 204.5. Samples: 62970. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:35:47,735][00821] Avg episode reward: [(0, '4.695')] +[2024-10-02 02:35:47,883][04739] Updated weights for policy 0, policy_version 60 (0.1082) +[2024-10-02 02:35:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 249856. Throughput: 0: 209.6. Samples: 64092. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:52,734][00821] Avg episode reward: [(0, '4.708')] +[2024-10-02 02:35:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 253952. Throughput: 0: 220.8. Samples: 65054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:35:57,734][00821] Avg episode reward: [(0, '4.702')] +[2024-10-02 02:36:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 258048. Throughput: 0: 208.5. Samples: 66150. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:36:02,733][00821] Avg episode reward: [(0, '4.761')] +[2024-10-02 02:36:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 262144. Throughput: 0: 207.0. Samples: 67248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:36:07,731][00821] Avg episode reward: [(0, '4.745')] +[2024-10-02 02:36:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 266240. Throughput: 0: 211.9. Samples: 68142. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:36:12,733][00821] Avg episode reward: [(0, '4.645')] +[2024-10-02 02:36:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 270336. Throughput: 0: 213.4. Samples: 69458. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:36:17,736][00821] Avg episode reward: [(0, '4.694')] +[2024-10-02 02:36:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 274432. Throughput: 0: 209.9. Samples: 70564. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:36:22,732][00821] Avg episode reward: [(0, '4.677')] +[2024-10-02 02:36:26,190][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000068_278528.pth... +[2024-10-02 02:36:26,295][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000018_73728.pth +[2024-10-02 02:36:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 278528. Throughput: 0: 206.0. Samples: 71182. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:36:27,732][00821] Avg episode reward: [(0, '4.578')] +[2024-10-02 02:36:32,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 282624. Throughput: 0: 210.8. Samples: 72458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:36:32,732][00821] Avg episode reward: [(0, '4.620')] +[2024-10-02 02:36:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 286720. Throughput: 0: 202.4. Samples: 73198. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:36:37,736][00821] Avg episode reward: [(0, '4.611')] +[2024-10-02 02:36:38,112][04739] Updated weights for policy 0, policy_version 70 (0.2529) +[2024-10-02 02:36:42,734][00821] Fps is (10 sec: 409.4, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 286720. Throughput: 0: 188.6. Samples: 73544. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:36:42,746][00821] Avg episode reward: [(0, '4.653')] +[2024-10-02 02:36:47,731][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 290816. Throughput: 0: 180.0. Samples: 74250. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:36:47,738][00821] Avg episode reward: [(0, '4.689')] +[2024-10-02 02:36:52,729][00821] Fps is (10 sec: 409.8, 60 sec: 682.7, 300 sec: 819.2). Total num frames: 290816. Throughput: 0: 180.1. Samples: 75354. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:36:52,741][00821] Avg episode reward: [(0, '4.604')] +[2024-10-02 02:36:57,729][00821] Fps is (10 sec: 819.3, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 299008. Throughput: 0: 177.4. Samples: 76124. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:36:57,734][00821] Avg episode reward: [(0, '4.624')] +[2024-10-02 02:37:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 819.2). Total num frames: 299008. Throughput: 0: 174.4. Samples: 77304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:37:02,736][00821] Avg episode reward: [(0, '4.709')] +[2024-10-02 02:37:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 819.2). Total num frames: 303104. Throughput: 0: 175.8. Samples: 78474. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:07,734][00821] Avg episode reward: [(0, '4.788')] +[2024-10-02 02:37:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 311296. Throughput: 0: 179.4. Samples: 79254. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:37:12,738][00821] Avg episode reward: [(0, '4.618')] +[2024-10-02 02:37:17,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 315392. Throughput: 0: 177.8. Samples: 80460. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:37:17,736][00821] Avg episode reward: [(0, '4.679')] +[2024-10-02 02:37:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 819.2). Total num frames: 315392. Throughput: 0: 184.5. Samples: 81500. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:37:22,732][00821] Avg episode reward: [(0, '4.705')] +[2024-10-02 02:37:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 323584. Throughput: 0: 187.9. Samples: 82000. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:37:27,734][00821] Avg episode reward: [(0, '4.721')] +[2024-10-02 02:37:31,589][04739] Updated weights for policy 0, policy_version 80 (0.2276) +[2024-10-02 02:37:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 327680. Throughput: 0: 206.5. Samples: 83542. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:32,732][00821] Avg episode reward: [(0, '4.690')] +[2024-10-02 02:37:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 331776. Throughput: 0: 211.7. Samples: 84882. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:37,732][00821] Avg episode reward: [(0, '4.738')] +[2024-10-02 02:37:42,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 335872. Throughput: 0: 208.8. Samples: 85520. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:42,744][00821] Avg episode reward: [(0, '4.774')] +[2024-10-02 02:37:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 339968. Throughput: 0: 206.4. Samples: 86594. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:47,731][00821] Avg episode reward: [(0, '4.670')] +[2024-10-02 02:37:52,730][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 344064. Throughput: 0: 214.0. Samples: 88102. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:52,738][00821] Avg episode reward: [(0, '4.618')] +[2024-10-02 02:37:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 348160. Throughput: 0: 209.0. Samples: 88660. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:37:57,734][00821] Avg episode reward: [(0, '4.525')] +[2024-10-02 02:38:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 352256. Throughput: 0: 204.4. Samples: 89656. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:02,737][00821] Avg episode reward: [(0, '4.476')] +[2024-10-02 02:38:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 356352. Throughput: 0: 215.9. Samples: 91214. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:07,739][00821] Avg episode reward: [(0, '4.548')] +[2024-10-02 02:38:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 360448. Throughput: 0: 218.0. Samples: 91808. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:12,732][00821] Avg episode reward: [(0, '4.506')] +[2024-10-02 02:38:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 364544. Throughput: 0: 205.8. Samples: 92802. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:17,732][00821] Avg episode reward: [(0, '4.484')] +[2024-10-02 02:38:21,303][04739] Updated weights for policy 0, policy_version 90 (0.1178) +[2024-10-02 02:38:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 368640. Throughput: 0: 206.6. Samples: 94180. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:22,734][00821] Avg episode reward: [(0, '4.546')] +[2024-10-02 02:38:25,421][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000091_372736.pth... +[2024-10-02 02:38:25,536][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000043_176128.pth +[2024-10-02 02:38:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 372736. Throughput: 0: 207.0. Samples: 94832. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:38:27,740][00821] Avg episode reward: [(0, '4.550')] +[2024-10-02 02:38:32,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 376832. Throughput: 0: 213.5. Samples: 96204. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:32,735][00821] Avg episode reward: [(0, '4.615')] +[2024-10-02 02:38:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 380928. Throughput: 0: 202.3. Samples: 97206. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:37,732][00821] Avg episode reward: [(0, '4.730')] +[2024-10-02 02:38:42,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 385024. Throughput: 0: 204.8. Samples: 97874. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:42,737][00821] Avg episode reward: [(0, '4.769')] +[2024-10-02 02:38:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 389120. Throughput: 0: 220.1. Samples: 99560. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:47,740][00821] Avg episode reward: [(0, '4.895')] +[2024-10-02 02:38:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 393216. Throughput: 0: 203.1. Samples: 100354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:52,736][00821] Avg episode reward: [(0, '5.045')] +[2024-10-02 02:38:55,906][04725] Saving new best policy, reward=4.895! +[2024-10-02 02:38:56,046][04725] Saving new best policy, reward=5.045! +[2024-10-02 02:38:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 397312. Throughput: 0: 203.6. Samples: 100970. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:38:57,738][00821] Avg episode reward: [(0, '5.053')] +[2024-10-02 02:39:00,106][04725] Saving new best policy, reward=5.053! +[2024-10-02 02:39:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 401408. Throughput: 0: 215.9. Samples: 102518. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:39:02,737][00821] Avg episode reward: [(0, '5.057')] +[2024-10-02 02:39:07,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 405504. Throughput: 0: 214.9. Samples: 103850. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:39:07,741][00821] Avg episode reward: [(0, '5.044')] +[2024-10-02 02:39:10,205][04725] Saving new best policy, reward=5.057! +[2024-10-02 02:39:10,204][04739] Updated weights for policy 0, policy_version 100 (0.0757) +[2024-10-02 02:39:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 409600. Throughput: 0: 205.3. Samples: 104070. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:12,732][00821] Avg episode reward: [(0, '5.136')] +[2024-10-02 02:39:14,218][04725] Signal inference workers to stop experience collection... (100 times) +[2024-10-02 02:39:14,279][04739] InferenceWorker_p0-w0: stopping experience collection (100 times) +[2024-10-02 02:39:15,664][04725] Signal inference workers to resume experience collection... (100 times) +[2024-10-02 02:39:15,665][04725] Saving new best policy, reward=5.136! +[2024-10-02 02:39:15,666][04739] InferenceWorker_p0-w0: resuming experience collection (100 times) +[2024-10-02 02:39:17,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 413696. Throughput: 0: 201.8. Samples: 105284. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:17,733][00821] Avg episode reward: [(0, '5.228')] +[2024-10-02 02:39:19,832][04725] Saving new best policy, reward=5.228! +[2024-10-02 02:39:22,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 417792. Throughput: 0: 218.2. Samples: 107024. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:22,741][00821] Avg episode reward: [(0, '5.046')] +[2024-10-02 02:39:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 421888. Throughput: 0: 209.8. Samples: 107314. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:27,731][00821] Avg episode reward: [(0, '4.918')] +[2024-10-02 02:39:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 425984. Throughput: 0: 193.1. Samples: 108250. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:39:32,736][00821] Avg episode reward: [(0, '4.984')] +[2024-10-02 02:39:37,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 430080. Throughput: 0: 210.6. Samples: 109830. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:39:37,734][00821] Avg episode reward: [(0, '5.011')] +[2024-10-02 02:39:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 434176. Throughput: 0: 212.0. Samples: 110510. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:42,732][00821] Avg episode reward: [(0, '4.911')] +[2024-10-02 02:39:47,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 438272. Throughput: 0: 200.1. Samples: 111524. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:39:47,734][00821] Avg episode reward: [(0, '4.959')] +[2024-10-02 02:39:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 442368. Throughput: 0: 199.8. Samples: 112840. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:39:52,738][00821] Avg episode reward: [(0, '4.974')] +[2024-10-02 02:39:57,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 446464. Throughput: 0: 208.1. Samples: 113436. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:39:57,742][00821] Avg episode reward: [(0, '4.880')] +[2024-10-02 02:39:59,032][04739] Updated weights for policy 0, policy_version 110 (0.0537) +[2024-10-02 02:40:02,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 450560. Throughput: 0: 215.6. Samples: 114984. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:40:02,737][00821] Avg episode reward: [(0, '4.824')] +[2024-10-02 02:40:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 454656. Throughput: 0: 196.8. Samples: 115878. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:40:07,733][00821] Avg episode reward: [(0, '4.899')] +[2024-10-02 02:40:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 458752. Throughput: 0: 203.2. Samples: 116458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:12,733][00821] Avg episode reward: [(0, '5.012')] +[2024-10-02 02:40:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 462848. Throughput: 0: 220.7. Samples: 118180. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:17,735][00821] Avg episode reward: [(0, '5.067')] +[2024-10-02 02:40:22,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 466944. Throughput: 0: 209.1. Samples: 119238. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:22,734][00821] Avg episode reward: [(0, '5.131')] +[2024-10-02 02:40:24,765][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000115_471040.pth... +[2024-10-02 02:40:24,897][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000068_278528.pth +[2024-10-02 02:40:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 471040. Throughput: 0: 199.5. Samples: 119488. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:27,735][00821] Avg episode reward: [(0, '5.111')] +[2024-10-02 02:40:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 475136. Throughput: 0: 215.4. Samples: 121218. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:32,737][00821] Avg episode reward: [(0, '5.140')] +[2024-10-02 02:40:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 479232. Throughput: 0: 214.0. Samples: 122470. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:37,732][00821] Avg episode reward: [(0, '5.215')] +[2024-10-02 02:40:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 483328. Throughput: 0: 209.0. Samples: 122842. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:42,733][00821] Avg episode reward: [(0, '5.228')] +[2024-10-02 02:40:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 487424. Throughput: 0: 202.8. Samples: 124112. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:40:47,742][00821] Avg episode reward: [(0, '5.306')] +[2024-10-02 02:40:48,454][04739] Updated weights for policy 0, policy_version 120 (0.1178) +[2024-10-02 02:40:52,486][04725] Saving new best policy, reward=5.306! +[2024-10-02 02:40:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 495616. Throughput: 0: 214.2. Samples: 125518. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:40:52,736][00821] Avg episode reward: [(0, '5.277')] +[2024-10-02 02:40:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 495616. Throughput: 0: 220.6. Samples: 126386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:40:57,731][00821] Avg episode reward: [(0, '5.170')] +[2024-10-02 02:41:02,731][00821] Fps is (10 sec: 409.5, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 499712. Throughput: 0: 198.4. Samples: 127110. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:41:02,737][00821] Avg episode reward: [(0, '5.053')] +[2024-10-02 02:41:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 507904. Throughput: 0: 208.4. Samples: 128618. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:41:07,736][00821] Avg episode reward: [(0, '5.106')] +[2024-10-02 02:41:12,729][00821] Fps is (10 sec: 1229.0, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 512000. Throughput: 0: 225.3. Samples: 129628. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:41:12,739][00821] Avg episode reward: [(0, '5.075')] +[2024-10-02 02:41:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 516096. Throughput: 0: 209.6. Samples: 130648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:41:17,732][00821] Avg episode reward: [(0, '5.160')] +[2024-10-02 02:41:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 520192. Throughput: 0: 204.0. Samples: 131650. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:41:22,733][00821] Avg episode reward: [(0, '5.091')] +[2024-10-02 02:41:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 524288. Throughput: 0: 218.8. Samples: 132690. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:41:27,732][00821] Avg episode reward: [(0, '4.924')] +[2024-10-02 02:41:32,730][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 528384. Throughput: 0: 215.9. Samples: 133828. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:41:32,737][00821] Avg episode reward: [(0, '5.052')] +[2024-10-02 02:41:37,086][04739] Updated weights for policy 0, policy_version 130 (0.0685) +[2024-10-02 02:41:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 532480. Throughput: 0: 205.6. Samples: 134772. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:41:37,738][00821] Avg episode reward: [(0, '5.022')] +[2024-10-02 02:41:42,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 536576. Throughput: 0: 207.7. Samples: 135732. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:41:42,732][00821] Avg episode reward: [(0, '5.108')] +[2024-10-02 02:41:47,733][00821] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 847.0). Total num frames: 540672. Throughput: 0: 216.7. Samples: 136864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:41:47,741][00821] Avg episode reward: [(0, '5.280')] +[2024-10-02 02:41:52,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 544768. Throughput: 0: 211.5. Samples: 138138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:41:52,735][00821] Avg episode reward: [(0, '5.415')] +[2024-10-02 02:41:56,542][04725] Saving new best policy, reward=5.415! +[2024-10-02 02:41:57,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 548864. Throughput: 0: 202.3. Samples: 138732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:41:57,735][00821] Avg episode reward: [(0, '5.485')] +[2024-10-02 02:42:00,701][04725] Saving new best policy, reward=5.485! +[2024-10-02 02:42:02,729][00821] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 552960. Throughput: 0: 209.1. Samples: 140056. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:02,732][00821] Avg episode reward: [(0, '5.442')] +[2024-10-02 02:42:07,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 557056. Throughput: 0: 221.4. Samples: 141614. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:07,740][00821] Avg episode reward: [(0, '5.470')] +[2024-10-02 02:42:12,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 561152. Throughput: 0: 204.2. Samples: 141878. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:12,738][00821] Avg episode reward: [(0, '5.464')] +[2024-10-02 02:42:17,729][00821] Fps is (10 sec: 819.7, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 565248. Throughput: 0: 202.8. Samples: 142956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:17,733][00821] Avg episode reward: [(0, '5.662')] +[2024-10-02 02:42:20,049][04725] Saving new best policy, reward=5.662! +[2024-10-02 02:42:22,729][00821] Fps is (10 sec: 819.7, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 569344. Throughput: 0: 224.3. Samples: 144866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:22,735][00821] Avg episode reward: [(0, '5.687')] +[2024-10-02 02:42:24,194][04739] Updated weights for policy 0, policy_version 140 (0.1015) +[2024-10-02 02:42:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 573440. Throughput: 0: 211.4. Samples: 145244. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:27,736][00821] Avg episode reward: [(0, '5.671')] +[2024-10-02 02:42:30,407][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000141_577536.pth... +[2024-10-02 02:42:30,546][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000091_372736.pth +[2024-10-02 02:42:30,578][04725] Saving new best policy, reward=5.687! +[2024-10-02 02:42:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 577536. Throughput: 0: 208.1. Samples: 146226. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:32,732][00821] Avg episode reward: [(0, '5.779')] +[2024-10-02 02:42:35,301][04725] Saving new best policy, reward=5.779! +[2024-10-02 02:42:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 581632. Throughput: 0: 215.1. Samples: 147816. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:37,732][00821] Avg episode reward: [(0, '5.887')] +[2024-10-02 02:42:39,436][04725] Saving new best policy, reward=5.887! +[2024-10-02 02:42:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 585728. Throughput: 0: 214.9. Samples: 148402. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:42,733][00821] Avg episode reward: [(0, '5.869')] +[2024-10-02 02:42:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 589824. Throughput: 0: 211.2. Samples: 149558. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:47,732][00821] Avg episode reward: [(0, '5.816')] +[2024-10-02 02:42:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 593920. Throughput: 0: 202.1. Samples: 150708. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:42:52,732][00821] Avg episode reward: [(0, '5.958')] +[2024-10-02 02:42:54,588][04725] Saving new best policy, reward=5.958! +[2024-10-02 02:42:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 598016. Throughput: 0: 211.4. Samples: 151388. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:42:57,738][00821] Avg episode reward: [(0, '6.037')] +[2024-10-02 02:43:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 602112. Throughput: 0: 225.2. Samples: 153092. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:02,738][00821] Avg episode reward: [(0, '6.096')] +[2024-10-02 02:43:03,706][04725] Saving new best policy, reward=6.037! +[2024-10-02 02:43:07,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 606208. Throughput: 0: 201.2. Samples: 153920. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:07,736][00821] Avg episode reward: [(0, '6.031')] +[2024-10-02 02:43:09,539][04725] Saving new best policy, reward=6.096! +[2024-10-02 02:43:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 610304. Throughput: 0: 204.3. Samples: 154436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:12,740][00821] Avg episode reward: [(0, '5.976')] +[2024-10-02 02:43:13,662][04739] Updated weights for policy 0, policy_version 150 (0.0079) +[2024-10-02 02:43:16,103][04725] Signal inference workers to stop experience collection... (150 times) +[2024-10-02 02:43:16,153][04739] InferenceWorker_p0-w0: stopping experience collection (150 times) +[2024-10-02 02:43:17,671][04725] Signal inference workers to resume experience collection... (150 times) +[2024-10-02 02:43:17,674][04739] InferenceWorker_p0-w0: resuming experience collection (150 times) +[2024-10-02 02:43:17,729][00821] Fps is (10 sec: 1229.0, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 618496. Throughput: 0: 220.6. Samples: 156154. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:43:17,737][00821] Avg episode reward: [(0, '5.881')] +[2024-10-02 02:43:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 618496. Throughput: 0: 207.4. Samples: 157150. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:43:22,733][00821] Avg episode reward: [(0, '5.693')] +[2024-10-02 02:43:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 622592. Throughput: 0: 204.1. Samples: 157588. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:27,731][00821] Avg episode reward: [(0, '5.680')] +[2024-10-02 02:43:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 630784. Throughput: 0: 213.5. Samples: 159164. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:32,739][00821] Avg episode reward: [(0, '5.671')] +[2024-10-02 02:43:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 634880. Throughput: 0: 213.0. Samples: 160292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:43:37,732][00821] Avg episode reward: [(0, '5.635')] +[2024-10-02 02:43:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 638976. Throughput: 0: 211.9. Samples: 160922. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:43:42,732][00821] Avg episode reward: [(0, '5.549')] +[2024-10-02 02:43:47,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 638976. Throughput: 0: 200.3. Samples: 162104. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:43:47,732][00821] Avg episode reward: [(0, '5.555')] +[2024-10-02 02:43:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 647168. Throughput: 0: 211.9. Samples: 163456. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:43:52,731][00821] Avg episode reward: [(0, '5.486')] +[2024-10-02 02:43:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 651264. Throughput: 0: 218.5. Samples: 164270. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:43:57,734][00821] Avg episode reward: [(0, '5.867')] +[2024-10-02 02:44:02,066][04739] Updated weights for policy 0, policy_version 160 (0.2113) +[2024-10-02 02:44:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 655360. Throughput: 0: 203.0. Samples: 165288. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:44:02,732][00821] Avg episode reward: [(0, '5.862')] +[2024-10-02 02:44:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 659456. Throughput: 0: 209.1. Samples: 166558. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:44:07,737][00821] Avg episode reward: [(0, '5.819')] +[2024-10-02 02:44:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 663552. Throughput: 0: 218.0. Samples: 167398. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:44:12,737][00821] Avg episode reward: [(0, '5.865')] +[2024-10-02 02:44:17,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 847.0). Total num frames: 667648. Throughput: 0: 210.7. Samples: 168648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:44:17,741][00821] Avg episode reward: [(0, '5.897')] +[2024-10-02 02:44:22,739][00821] Fps is (10 sec: 818.4, 60 sec: 887.3, 300 sec: 846.9). Total num frames: 671744. Throughput: 0: 205.3. Samples: 169532. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:44:22,742][00821] Avg episode reward: [(0, '6.205')] +[2024-10-02 02:44:26,093][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000165_675840.pth... +[2024-10-02 02:44:26,207][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000115_471040.pth +[2024-10-02 02:44:26,225][04725] Saving new best policy, reward=6.205! +[2024-10-02 02:44:27,729][00821] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 675840. Throughput: 0: 214.4. Samples: 170568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:44:27,732][00821] Avg episode reward: [(0, '6.130')] +[2024-10-02 02:44:32,729][00821] Fps is (10 sec: 820.0, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 679936. Throughput: 0: 221.0. Samples: 172050. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:44:32,732][00821] Avg episode reward: [(0, '6.185')] +[2024-10-02 02:44:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 684032. Throughput: 0: 214.0. Samples: 173086. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:44:37,737][00821] Avg episode reward: [(0, '6.237')] +[2024-10-02 02:44:41,211][04725] Saving new best policy, reward=6.237! +[2024-10-02 02:44:42,732][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 688128. Throughput: 0: 208.8. Samples: 173668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:44:42,741][00821] Avg episode reward: [(0, '6.182')] +[2024-10-02 02:44:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 692224. Throughput: 0: 217.9. Samples: 175092. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:44:47,736][00821] Avg episode reward: [(0, '6.107')] +[2024-10-02 02:44:49,474][04739] Updated weights for policy 0, policy_version 170 (0.1079) +[2024-10-02 02:44:52,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 696320. Throughput: 0: 220.4. Samples: 176474. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:44:52,734][00821] Avg episode reward: [(0, '5.994')] +[2024-10-02 02:44:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 700416. Throughput: 0: 207.5. Samples: 176734. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:44:57,734][00821] Avg episode reward: [(0, '6.017')] +[2024-10-02 02:45:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 704512. Throughput: 0: 210.5. Samples: 178118. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:45:02,733][00821] Avg episode reward: [(0, '6.337')] +[2024-10-02 02:45:04,559][04725] Saving new best policy, reward=6.337! +[2024-10-02 02:45:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 708608. Throughput: 0: 228.2. Samples: 179798. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:45:07,733][00821] Avg episode reward: [(0, '6.262')] +[2024-10-02 02:45:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 712704. Throughput: 0: 211.8. Samples: 180098. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:12,737][00821] Avg episode reward: [(0, '6.255')] +[2024-10-02 02:45:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 847.0). Total num frames: 716800. Throughput: 0: 200.6. Samples: 181076. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:17,737][00821] Avg episode reward: [(0, '6.281')] +[2024-10-02 02:45:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 847.0). Total num frames: 720896. Throughput: 0: 215.1. Samples: 182766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:22,740][00821] Avg episode reward: [(0, '6.410')] +[2024-10-02 02:45:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 724992. Throughput: 0: 211.3. Samples: 183178. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:27,734][00821] Avg episode reward: [(0, '6.305')] +[2024-10-02 02:45:28,834][04725] Saving new best policy, reward=6.410! +[2024-10-02 02:45:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 729088. Throughput: 0: 207.4. Samples: 184426. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:32,740][00821] Avg episode reward: [(0, '6.359')] +[2024-10-02 02:45:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 733184. Throughput: 0: 207.4. Samples: 185808. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:37,735][00821] Avg episode reward: [(0, '6.595')] +[2024-10-02 02:45:38,965][04739] Updated weights for policy 0, policy_version 180 (0.0533) +[2024-10-02 02:45:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 737280. Throughput: 0: 214.4. Samples: 186382. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:42,732][00821] Avg episode reward: [(0, '6.724')] +[2024-10-02 02:45:43,140][04725] Saving new best policy, reward=6.595! +[2024-10-02 02:45:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 741376. Throughput: 0: 217.5. Samples: 187904. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:45:47,734][00821] Avg episode reward: [(0, '6.872')] +[2024-10-02 02:45:48,245][04725] Saving new best policy, reward=6.724! +[2024-10-02 02:45:52,736][00821] Fps is (10 sec: 818.6, 60 sec: 819.1, 300 sec: 846.9). Total num frames: 745472. Throughput: 0: 200.2. Samples: 188810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:45:52,746][00821] Avg episode reward: [(0, '6.932')] +[2024-10-02 02:45:54,372][04725] Saving new best policy, reward=6.872! +[2024-10-02 02:45:54,512][04725] Saving new best policy, reward=6.932! +[2024-10-02 02:45:57,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 749568. Throughput: 0: 206.7. Samples: 189398. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:45:57,736][00821] Avg episode reward: [(0, '6.971')] +[2024-10-02 02:46:02,600][04725] Saving new best policy, reward=6.971! +[2024-10-02 02:46:02,729][00821] Fps is (10 sec: 1229.7, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 757760. Throughput: 0: 221.4. Samples: 191040. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:02,739][00821] Avg episode reward: [(0, '7.079')] +[2024-10-02 02:46:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 757760. Throughput: 0: 205.9. Samples: 192032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:07,741][00821] Avg episode reward: [(0, '7.212')] +[2024-10-02 02:46:08,413][04725] Saving new best policy, reward=7.079! +[2024-10-02 02:46:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 761856. Throughput: 0: 205.9. Samples: 192442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:12,732][00821] Avg episode reward: [(0, '7.371')] +[2024-10-02 02:46:13,666][04725] Saving new best policy, reward=7.212! +[2024-10-02 02:46:17,657][04725] Saving new best policy, reward=7.371! +[2024-10-02 02:46:17,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 770048. Throughput: 0: 214.7. Samples: 194088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:17,739][00821] Avg episode reward: [(0, '7.228')] +[2024-10-02 02:46:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 774144. Throughput: 0: 208.6. Samples: 195194. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:22,738][00821] Avg episode reward: [(0, '7.161')] +[2024-10-02 02:46:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 774144. Throughput: 0: 210.5. Samples: 195854. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:27,732][00821] Avg episode reward: [(0, '7.108')] +[2024-10-02 02:46:28,446][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000190_778240.pth... +[2024-10-02 02:46:28,477][04739] Updated weights for policy 0, policy_version 190 (0.0105) +[2024-10-02 02:46:28,573][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000141_577536.pth +[2024-10-02 02:46:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 778240. Throughput: 0: 204.8. Samples: 197118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:46:32,738][00821] Avg episode reward: [(0, '7.021')] +[2024-10-02 02:46:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 786432. Throughput: 0: 211.1. Samples: 198306. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:37,733][00821] Avg episode reward: [(0, '6.911')] +[2024-10-02 02:46:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 790528. Throughput: 0: 219.5. Samples: 199276. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:42,731][00821] Avg episode reward: [(0, '7.026')] +[2024-10-02 02:46:47,731][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 790528. Throughput: 0: 200.2. Samples: 200050. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:47,747][00821] Avg episode reward: [(0, '7.170')] +[2024-10-02 02:46:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 794624. Throughput: 0: 185.8. Samples: 200392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:52,733][00821] Avg episode reward: [(0, '7.090')] +[2024-10-02 02:46:57,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 798720. Throughput: 0: 196.9. Samples: 201302. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:46:57,733][00821] Avg episode reward: [(0, '6.955')] +[2024-10-02 02:47:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 802816. Throughput: 0: 190.5. Samples: 202660. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:47:02,738][00821] Avg episode reward: [(0, '7.202')] +[2024-10-02 02:47:07,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 806912. Throughput: 0: 187.8. Samples: 203646. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:47:07,734][00821] Avg episode reward: [(0, '7.382')] +[2024-10-02 02:47:11,558][04725] Saving new best policy, reward=7.382! +[2024-10-02 02:47:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 811008. Throughput: 0: 189.0. Samples: 204360. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:12,732][00821] Avg episode reward: [(0, '7.365')] +[2024-10-02 02:47:17,729][00821] Fps is (10 sec: 819.3, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 815104. Throughput: 0: 189.8. Samples: 205660. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:17,731][00821] Avg episode reward: [(0, '7.319')] +[2024-10-02 02:47:19,797][04739] Updated weights for policy 0, policy_version 200 (0.0519) +[2024-10-02 02:47:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 819200. Throughput: 0: 193.6. Samples: 207018. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:22,735][00821] Avg episode reward: [(0, '7.559')] +[2024-10-02 02:47:24,261][04725] Signal inference workers to stop experience collection... (200 times) +[2024-10-02 02:47:24,343][04739] InferenceWorker_p0-w0: stopping experience collection (200 times) +[2024-10-02 02:47:26,137][04725] Signal inference workers to resume experience collection... (200 times) +[2024-10-02 02:47:26,139][04739] InferenceWorker_p0-w0: resuming experience collection (200 times) +[2024-10-02 02:47:26,140][04725] Saving new best policy, reward=7.559! +[2024-10-02 02:47:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 823296. Throughput: 0: 181.6. Samples: 207448. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:27,735][00821] Avg episode reward: [(0, '7.676')] +[2024-10-02 02:47:31,075][04725] Saving new best policy, reward=7.676! +[2024-10-02 02:47:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 827392. Throughput: 0: 189.6. Samples: 208580. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:32,732][00821] Avg episode reward: [(0, '7.824')] +[2024-10-02 02:47:35,253][04725] Saving new best policy, reward=7.824! +[2024-10-02 02:47:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 831488. Throughput: 0: 221.5. Samples: 210360. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:37,732][00821] Avg episode reward: [(0, '8.254')] +[2024-10-02 02:47:39,988][04725] Saving new best policy, reward=8.254! +[2024-10-02 02:47:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 835584. Throughput: 0: 207.6. Samples: 210644. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:42,734][00821] Avg episode reward: [(0, '8.238')] +[2024-10-02 02:47:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 839680. Throughput: 0: 198.4. Samples: 211590. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:47,735][00821] Avg episode reward: [(0, '8.606')] +[2024-10-02 02:47:50,478][04725] Saving new best policy, reward=8.606! +[2024-10-02 02:47:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 843776. Throughput: 0: 213.5. Samples: 213252. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:52,731][00821] Avg episode reward: [(0, '8.479')] +[2024-10-02 02:47:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 847872. Throughput: 0: 209.7. Samples: 213798. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:47:57,733][00821] Avg episode reward: [(0, '8.315')] +[2024-10-02 02:48:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 851968. Throughput: 0: 204.9. Samples: 214880. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:02,737][00821] Avg episode reward: [(0, '8.525')] +[2024-10-02 02:48:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 856064. Throughput: 0: 203.4. Samples: 216172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:07,735][00821] Avg episode reward: [(0, '8.496')] +[2024-10-02 02:48:10,134][04739] Updated weights for policy 0, policy_version 210 (0.0063) +[2024-10-02 02:48:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 860160. Throughput: 0: 207.1. Samples: 216766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:12,738][00821] Avg episode reward: [(0, '8.369')] +[2024-10-02 02:48:17,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 864256. Throughput: 0: 214.9. Samples: 218252. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:17,737][00821] Avg episode reward: [(0, '8.346')] +[2024-10-02 02:48:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 868352. Throughput: 0: 194.8. Samples: 219128. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:48:22,736][00821] Avg episode reward: [(0, '8.549')] +[2024-10-02 02:48:25,464][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000213_872448.pth... +[2024-10-02 02:48:25,580][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000165_675840.pth +[2024-10-02 02:48:27,729][00821] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 872448. Throughput: 0: 201.8. Samples: 219724. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:48:27,732][00821] Avg episode reward: [(0, '8.438')] +[2024-10-02 02:48:32,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 876544. Throughput: 0: 214.7. Samples: 221254. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:48:32,740][00821] Avg episode reward: [(0, '8.208')] +[2024-10-02 02:48:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 880640. Throughput: 0: 204.8. Samples: 222468. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:48:37,747][00821] Avg episode reward: [(0, '8.223')] +[2024-10-02 02:48:42,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 884736. Throughput: 0: 199.2. Samples: 222762. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:42,739][00821] Avg episode reward: [(0, '8.534')] +[2024-10-02 02:48:47,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 888832. Throughput: 0: 210.4. Samples: 224350. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:48:47,739][00821] Avg episode reward: [(0, '8.536')] +[2024-10-02 02:48:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 892928. Throughput: 0: 215.4. Samples: 225864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:48:52,732][00821] Avg episode reward: [(0, '8.177')] +[2024-10-02 02:48:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 897024. Throughput: 0: 207.4. Samples: 226098. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:48:57,735][00821] Avg episode reward: [(0, '7.987')] +[2024-10-02 02:49:00,261][04739] Updated weights for policy 0, policy_version 220 (0.0547) +[2024-10-02 02:49:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 901120. Throughput: 0: 200.6. Samples: 227278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:02,731][00821] Avg episode reward: [(0, '8.205')] +[2024-10-02 02:49:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 905216. Throughput: 0: 217.1. Samples: 228896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:07,733][00821] Avg episode reward: [(0, '8.083')] +[2024-10-02 02:49:12,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 909312. Throughput: 0: 217.6. Samples: 229518. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:12,735][00821] Avg episode reward: [(0, '8.036')] +[2024-10-02 02:49:17,733][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 913408. Throughput: 0: 199.6. Samples: 230236. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:17,742][00821] Avg episode reward: [(0, '7.938')] +[2024-10-02 02:49:22,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 917504. Throughput: 0: 211.0. Samples: 231964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:22,739][00821] Avg episode reward: [(0, '7.948')] +[2024-10-02 02:49:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 921600. Throughput: 0: 217.2. Samples: 232534. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:27,738][00821] Avg episode reward: [(0, '8.244')] +[2024-10-02 02:49:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 925696. Throughput: 0: 208.2. Samples: 233720. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:49:32,731][00821] Avg episode reward: [(0, '8.146')] +[2024-10-02 02:49:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 929792. Throughput: 0: 201.6. Samples: 234938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:49:37,740][00821] Avg episode reward: [(0, '8.381')] +[2024-10-02 02:49:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 933888. Throughput: 0: 206.0. Samples: 235366. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:49:42,738][00821] Avg episode reward: [(0, '7.982')] +[2024-10-02 02:49:47,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 937984. Throughput: 0: 217.7. Samples: 237076. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:49:47,733][00821] Avg episode reward: [(0, '8.044')] +[2024-10-02 02:49:48,070][04739] Updated weights for policy 0, policy_version 230 (0.1166) +[2024-10-02 02:49:52,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 942080. Throughput: 0: 202.3. Samples: 238002. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:52,734][00821] Avg episode reward: [(0, '7.990')] +[2024-10-02 02:49:57,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 946176. Throughput: 0: 199.4. Samples: 238490. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:49:57,742][00821] Avg episode reward: [(0, '8.018')] +[2024-10-02 02:50:02,729][00821] Fps is (10 sec: 1229.0, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 954368. Throughput: 0: 218.7. Samples: 240078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:50:02,746][00821] Avg episode reward: [(0, '8.031')] +[2024-10-02 02:50:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 954368. Throughput: 0: 206.8. Samples: 241272. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:50:07,732][00821] Avg episode reward: [(0, '8.435')] +[2024-10-02 02:50:12,730][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 958464. Throughput: 0: 203.6. Samples: 241698. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:50:12,738][00821] Avg episode reward: [(0, '8.497')] +[2024-10-02 02:50:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 962560. Throughput: 0: 209.8. Samples: 243162. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:50:17,732][00821] Avg episode reward: [(0, '8.680')] +[2024-10-02 02:50:22,535][04725] Saving new best policy, reward=8.680! +[2024-10-02 02:50:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 970752. Throughput: 0: 210.4. Samples: 244408. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:50:22,731][00821] Avg episode reward: [(0, '8.678')] +[2024-10-02 02:50:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 970752. Throughput: 0: 215.4. Samples: 245058. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:50:27,733][00821] Avg episode reward: [(0, '8.607')] +[2024-10-02 02:50:28,977][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000238_974848.pth... +[2024-10-02 02:50:29,106][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000190_778240.pth +[2024-10-02 02:50:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 974848. Throughput: 0: 197.6. Samples: 245968. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:50:32,739][00821] Avg episode reward: [(0, '8.957')] +[2024-10-02 02:50:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 978944. Throughput: 0: 210.9. Samples: 247490. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:50:37,739][00821] Avg episode reward: [(0, '8.533')] +[2024-10-02 02:50:38,134][04725] Saving new best policy, reward=8.957! +[2024-10-02 02:50:38,145][04739] Updated weights for policy 0, policy_version 240 (0.1049) +[2024-10-02 02:50:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 983040. Throughput: 0: 219.7. Samples: 248376. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:50:42,733][00821] Avg episode reward: [(0, '8.241')] +[2024-10-02 02:50:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 987136. Throughput: 0: 205.7. Samples: 249334. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:50:47,733][00821] Avg episode reward: [(0, '8.471')] +[2024-10-02 02:50:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 991232. Throughput: 0: 206.8. Samples: 250580. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:50:52,737][00821] Avg episode reward: [(0, '8.124')] +[2024-10-02 02:50:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 995328. Throughput: 0: 211.5. Samples: 251216. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:50:57,735][00821] Avg episode reward: [(0, '8.068')] +[2024-10-02 02:51:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 999424. Throughput: 0: 209.2. Samples: 252574. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:51:02,738][00821] Avg episode reward: [(0, '8.198')] +[2024-10-02 02:51:07,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1003520. Throughput: 0: 200.8. Samples: 253446. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:51:07,738][00821] Avg episode reward: [(0, '8.273')] +[2024-10-02 02:51:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1007616. Throughput: 0: 199.4. Samples: 254032. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:51:12,742][00821] Avg episode reward: [(0, '8.092')] +[2024-10-02 02:51:17,729][00821] Fps is (10 sec: 1229.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1015808. Throughput: 0: 215.6. Samples: 255672. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:17,734][00821] Avg episode reward: [(0, '7.872')] +[2024-10-02 02:51:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1015808. Throughput: 0: 204.0. Samples: 256670. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:22,732][00821] Avg episode reward: [(0, '7.847')] +[2024-10-02 02:51:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1019904. Throughput: 0: 194.8. Samples: 257140. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:27,739][00821] Avg episode reward: [(0, '8.134')] +[2024-10-02 02:51:28,468][04739] Updated weights for policy 0, policy_version 250 (0.1678) +[2024-10-02 02:51:30,973][04725] Signal inference workers to stop experience collection... (250 times) +[2024-10-02 02:51:31,049][04739] InferenceWorker_p0-w0: stopping experience collection (250 times) +[2024-10-02 02:51:32,518][04725] Signal inference workers to resume experience collection... (250 times) +[2024-10-02 02:51:32,521][04739] InferenceWorker_p0-w0: resuming experience collection (250 times) +[2024-10-02 02:51:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1028096. Throughput: 0: 209.0. Samples: 258740. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:51:32,735][00821] Avg episode reward: [(0, '8.310')] +[2024-10-02 02:51:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1032192. Throughput: 0: 206.7. Samples: 259880. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:37,732][00821] Avg episode reward: [(0, '7.837')] +[2024-10-02 02:51:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1032192. Throughput: 0: 206.8. Samples: 260524. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:42,739][00821] Avg episode reward: [(0, '8.043')] +[2024-10-02 02:51:47,731][00821] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 1040384. Throughput: 0: 203.4. Samples: 261728. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:47,741][00821] Avg episode reward: [(0, '8.257')] +[2024-10-02 02:51:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1044480. Throughput: 0: 211.9. Samples: 262980. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:51:52,733][00821] Avg episode reward: [(0, '8.722')] +[2024-10-02 02:51:57,730][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1048576. Throughput: 0: 217.4. Samples: 263816. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:51:57,733][00821] Avg episode reward: [(0, '8.821')] +[2024-10-02 02:52:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1048576. Throughput: 0: 201.4. Samples: 264736. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:52:02,731][00821] Avg episode reward: [(0, '9.080')] +[2024-10-02 02:52:07,391][04725] Saving new best policy, reward=9.080! +[2024-10-02 02:52:07,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1056768. Throughput: 0: 204.9. Samples: 265890. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:52:07,732][00821] Avg episode reward: [(0, '9.118')] +[2024-10-02 02:52:11,643][04725] Saving new best policy, reward=9.118! +[2024-10-02 02:52:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1060864. Throughput: 0: 217.7. Samples: 266938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:52:12,731][00821] Avg episode reward: [(0, '9.052')] +[2024-10-02 02:52:17,620][04739] Updated weights for policy 0, policy_version 260 (0.0565) +[2024-10-02 02:52:17,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1064960. Throughput: 0: 205.5. Samples: 267988. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:52:17,738][00821] Avg episode reward: [(0, '9.042')] +[2024-10-02 02:52:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1064960. Throughput: 0: 202.1. Samples: 268974. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:52:22,740][00821] Avg episode reward: [(0, '9.239')] +[2024-10-02 02:52:27,729][00821] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1069056. Throughput: 0: 201.9. Samples: 269610. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:52:27,732][00821] Avg episode reward: [(0, '9.372')] +[2024-10-02 02:52:27,912][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000262_1073152.pth... +[2024-10-02 02:52:28,029][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000213_872448.pth +[2024-10-02 02:52:28,049][04725] Saving new best policy, reward=9.239! +[2024-10-02 02:52:32,664][04725] Saving new best policy, reward=9.372! +[2024-10-02 02:52:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1077248. Throughput: 0: 207.4. Samples: 271060. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:52:32,732][00821] Avg episode reward: [(0, '9.523')] +[2024-10-02 02:52:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1077248. Throughput: 0: 201.4. Samples: 272042. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:52:37,732][00821] Avg episode reward: [(0, '9.219')] +[2024-10-02 02:52:39,226][04725] Saving new best policy, reward=9.523! +[2024-10-02 02:52:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1081344. Throughput: 0: 192.5. Samples: 272478. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:52:42,739][00821] Avg episode reward: [(0, '9.030')] +[2024-10-02 02:52:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1089536. Throughput: 0: 210.2. Samples: 274194. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:52:47,737][00821] Avg episode reward: [(0, '8.903')] +[2024-10-02 02:52:52,730][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1089536. Throughput: 0: 206.7. Samples: 275190. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:52:52,736][00821] Avg episode reward: [(0, '9.048')] +[2024-10-02 02:52:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1093632. Throughput: 0: 196.0. Samples: 275756. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:52:57,735][00821] Avg episode reward: [(0, '8.816')] +[2024-10-02 02:53:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1097728. Throughput: 0: 203.3. Samples: 277138. Policy #0 lag: (min: 1.0, avg: 1.8, max: 3.0) +[2024-10-02 02:53:02,739][00821] Avg episode reward: [(0, '8.833')] +[2024-10-02 02:53:07,202][04739] Updated weights for policy 0, policy_version 270 (0.0573) +[2024-10-02 02:53:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1105920. Throughput: 0: 207.1. Samples: 278294. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:53:07,736][00821] Avg episode reward: [(0, '8.737')] +[2024-10-02 02:53:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1105920. Throughput: 0: 210.2. Samples: 279068. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:53:12,733][00821] Avg episode reward: [(0, '8.801')] +[2024-10-02 02:53:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 1110016. Throughput: 0: 196.6. Samples: 279908. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:53:17,737][00821] Avg episode reward: [(0, '8.762')] +[2024-10-02 02:53:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1118208. Throughput: 0: 206.4. Samples: 281332. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:53:22,737][00821] Avg episode reward: [(0, '8.651')] +[2024-10-02 02:53:27,737][00821] Fps is (10 sec: 1227.8, 60 sec: 887.3, 300 sec: 833.1). Total num frames: 1122304. Throughput: 0: 218.6. Samples: 282318. Policy #0 lag: (min: 1.0, avg: 1.8, max: 3.0) +[2024-10-02 02:53:27,745][00821] Avg episode reward: [(0, '8.763')] +[2024-10-02 02:53:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1122304. Throughput: 0: 201.2. Samples: 283248. Policy #0 lag: (min: 1.0, avg: 1.8, max: 3.0) +[2024-10-02 02:53:32,734][00821] Avg episode reward: [(0, '8.842')] +[2024-10-02 02:53:37,729][00821] Fps is (10 sec: 409.9, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1126400. Throughput: 0: 203.3. Samples: 284340. Policy #0 lag: (min: 1.0, avg: 1.8, max: 3.0) +[2024-10-02 02:53:37,731][00821] Avg episode reward: [(0, '8.985')] +[2024-10-02 02:53:42,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1134592. Throughput: 0: 212.8. Samples: 285334. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:53:42,735][00821] Avg episode reward: [(0, '9.028')] +[2024-10-02 02:53:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1138688. Throughput: 0: 206.8. Samples: 286444. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:53:47,733][00821] Avg episode reward: [(0, '9.209')] +[2024-10-02 02:53:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1138688. Throughput: 0: 202.6. Samples: 287410. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:53:52,737][00821] Avg episode reward: [(0, '9.442')] +[2024-10-02 02:53:57,420][04739] Updated weights for policy 0, policy_version 280 (0.1333) +[2024-10-02 02:53:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1146880. Throughput: 0: 205.7. Samples: 288324. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:53:57,732][00821] Avg episode reward: [(0, '9.582')] +[2024-10-02 02:54:01,511][04725] Saving new best policy, reward=9.582! +[2024-10-02 02:54:02,731][00821] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 1150976. Throughput: 0: 213.3. Samples: 289506. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:54:02,739][00821] Avg episode reward: [(0, '9.572')] +[2024-10-02 02:54:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1155072. Throughput: 0: 204.3. Samples: 290524. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:54:07,734][00821] Avg episode reward: [(0, '9.436')] +[2024-10-02 02:54:12,729][00821] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1155072. Throughput: 0: 196.8. Samples: 291174. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:54:12,739][00821] Avg episode reward: [(0, '9.567')] +[2024-10-02 02:54:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1163264. Throughput: 0: 207.8. Samples: 292598. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:17,738][00821] Avg episode reward: [(0, '9.853')] +[2024-10-02 02:54:21,376][04725] Saving new best policy, reward=9.853! +[2024-10-02 02:54:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1167360. Throughput: 0: 211.1. Samples: 293838. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:22,737][00821] Avg episode reward: [(0, '9.990')] +[2024-10-02 02:54:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 1167360. Throughput: 0: 203.5. Samples: 294490. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:27,732][00821] Avg episode reward: [(0, '10.061')] +[2024-10-02 02:54:27,932][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000286_1171456.pth... +[2024-10-02 02:54:28,050][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000238_974848.pth +[2024-10-02 02:54:28,078][04725] Saving new best policy, reward=9.990! +[2024-10-02 02:54:32,620][04725] Saving new best policy, reward=10.061! +[2024-10-02 02:54:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1175552. Throughput: 0: 203.4. Samples: 295596. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:32,731][00821] Avg episode reward: [(0, '9.976')] +[2024-10-02 02:54:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1179648. Throughput: 0: 205.6. Samples: 296664. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:37,731][00821] Avg episode reward: [(0, '9.853')] +[2024-10-02 02:54:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1183744. Throughput: 0: 205.1. Samples: 297554. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:42,739][00821] Avg episode reward: [(0, '9.785')] +[2024-10-02 02:54:47,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1183744. Throughput: 0: 200.3. Samples: 298520. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 02:54:47,739][00821] Avg episode reward: [(0, '9.885')] +[2024-10-02 02:54:48,105][04739] Updated weights for policy 0, policy_version 290 (0.1039) +[2024-10-02 02:54:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1191936. Throughput: 0: 203.4. Samples: 299678. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 02:54:52,732][00821] Avg episode reward: [(0, '9.859')] +[2024-10-02 02:54:57,732][00821] Fps is (10 sec: 1228.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1196032. Throughput: 0: 210.7. Samples: 300656. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:54:57,737][00821] Avg episode reward: [(0, '9.750')] +[2024-10-02 02:55:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1200128. Throughput: 0: 200.1. Samples: 301604. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:55:02,732][00821] Avg episode reward: [(0, '9.874')] +[2024-10-02 02:55:07,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1204224. Throughput: 0: 195.0. Samples: 302614. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:55:07,733][00821] Avg episode reward: [(0, '9.763')] +[2024-10-02 02:55:12,730][00821] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1208320. Throughput: 0: 202.4. Samples: 303596. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:55:12,742][00821] Avg episode reward: [(0, '9.485')] +[2024-10-02 02:55:17,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1212416. Throughput: 0: 200.1. Samples: 304600. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:17,737][00821] Avg episode reward: [(0, '9.702')] +[2024-10-02 02:55:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1212416. Throughput: 0: 199.5. Samples: 305640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:22,733][00821] Avg episode reward: [(0, '9.722')] +[2024-10-02 02:55:27,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1220608. Throughput: 0: 197.4. Samples: 306436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:27,741][00821] Avg episode reward: [(0, '9.637')] +[2024-10-02 02:55:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1224704. Throughput: 0: 204.1. Samples: 307706. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:32,738][00821] Avg episode reward: [(0, '9.424')] +[2024-10-02 02:55:37,459][04739] Updated weights for policy 0, policy_version 300 (0.1093) +[2024-10-02 02:55:37,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1228800. Throughput: 0: 201.3. Samples: 308736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:37,740][00821] Avg episode reward: [(0, '9.422')] +[2024-10-02 02:55:42,387][04725] Signal inference workers to stop experience collection... (300 times) +[2024-10-02 02:55:42,461][04739] InferenceWorker_p0-w0: stopping experience collection (300 times) +[2024-10-02 02:55:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1228800. Throughput: 0: 192.9. Samples: 309338. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:42,732][00821] Avg episode reward: [(0, '9.365')] +[2024-10-02 02:55:43,677][04725] Signal inference workers to resume experience collection... (300 times) +[2024-10-02 02:55:43,679][04739] InferenceWorker_p0-w0: resuming experience collection (300 times) +[2024-10-02 02:55:47,729][00821] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1232896. Throughput: 0: 198.9. Samples: 310556. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:47,735][00821] Avg episode reward: [(0, '9.815')] +[2024-10-02 02:55:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1241088. Throughput: 0: 204.6. Samples: 311820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:52,735][00821] Avg episode reward: [(0, '9.842')] +[2024-10-02 02:55:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 1241088. Throughput: 0: 198.5. Samples: 312530. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:55:57,732][00821] Avg episode reward: [(0, '10.087')] +[2024-10-02 02:56:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1245184. Throughput: 0: 193.9. Samples: 313326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:02,738][00821] Avg episode reward: [(0, '9.786')] +[2024-10-02 02:56:03,642][04725] Saving new best policy, reward=10.087! +[2024-10-02 02:56:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1249280. Throughput: 0: 204.6. Samples: 314846. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:07,736][00821] Avg episode reward: [(0, '9.958')] +[2024-10-02 02:56:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1257472. Throughput: 0: 208.5. Samples: 315818. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:12,736][00821] Avg episode reward: [(0, '10.149')] +[2024-10-02 02:56:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1257472. Throughput: 0: 201.5. Samples: 316774. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:17,733][00821] Avg episode reward: [(0, '10.213')] +[2024-10-02 02:56:18,999][04725] Saving new best policy, reward=10.149! +[2024-10-02 02:56:22,734][00821] Fps is (10 sec: 409.4, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 1261568. Throughput: 0: 206.3. Samples: 318020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:22,748][00821] Avg episode reward: [(0, '10.263')] +[2024-10-02 02:56:24,192][04725] Saving new best policy, reward=10.213! +[2024-10-02 02:56:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1265664. Throughput: 0: 203.0. Samples: 318472. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:27,738][00821] Avg episode reward: [(0, '10.705')] +[2024-10-02 02:56:28,507][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000310_1269760.pth... +[2024-10-02 02:56:28,526][04739] Updated weights for policy 0, policy_version 310 (0.1667) +[2024-10-02 02:56:28,610][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000262_1073152.pth +[2024-10-02 02:56:28,625][04725] Saving new best policy, reward=10.263! +[2024-10-02 02:56:32,729][00821] Fps is (10 sec: 819.6, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1269760. Throughput: 0: 205.7. Samples: 319814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:32,734][00821] Avg episode reward: [(0, '10.886')] +[2024-10-02 02:56:34,501][04725] Saving new best policy, reward=10.705! +[2024-10-02 02:56:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 1273856. Throughput: 0: 197.1. Samples: 320688. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:56:37,736][00821] Avg episode reward: [(0, '10.802')] +[2024-10-02 02:56:39,807][04725] Saving new best policy, reward=10.886! +[2024-10-02 02:56:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1277952. Throughput: 0: 196.1. Samples: 321354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:56:42,732][00821] Avg episode reward: [(0, '10.677')] +[2024-10-02 02:56:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1282048. Throughput: 0: 217.1. Samples: 323096. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:56:47,731][00821] Avg episode reward: [(0, '11.315')] +[2024-10-02 02:56:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1286144. Throughput: 0: 204.1. Samples: 324032. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:56:52,734][00821] Avg episode reward: [(0, '11.506')] +[2024-10-02 02:56:57,009][04725] Saving new best policy, reward=11.315! +[2024-10-02 02:56:57,496][04725] Saving new best policy, reward=11.506! +[2024-10-02 02:56:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1290240. Throughput: 0: 185.6. Samples: 324172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:56:57,734][00821] Avg episode reward: [(0, '11.506')] +[2024-10-02 02:57:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1290240. Throughput: 0: 181.6. Samples: 324946. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 02:57:02,732][00821] Avg episode reward: [(0, '12.023')] +[2024-10-02 02:57:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1294336. Throughput: 0: 177.9. Samples: 326026. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:57:07,732][00821] Avg episode reward: [(0, '11.795')] +[2024-10-02 02:57:09,509][04725] Saving new best policy, reward=12.023! +[2024-10-02 02:57:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 791.4). Total num frames: 1298432. Throughput: 0: 172.8. Samples: 326248. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 02:57:12,732][00821] Avg episode reward: [(0, '11.854')] +[2024-10-02 02:57:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1302528. Throughput: 0: 167.5. Samples: 327352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 02:57:17,732][00821] Avg episode reward: [(0, '11.585')] +[2024-10-02 02:57:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 805.3). Total num frames: 1306624. Throughput: 0: 186.4. Samples: 329074. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 02:57:22,743][00821] Avg episode reward: [(0, '11.819')] +[2024-10-02 02:57:24,205][04739] Updated weights for policy 0, policy_version 320 (0.1021) +[2024-10-02 02:57:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1310720. Throughput: 0: 179.7. Samples: 329440. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:57:27,732][00821] Avg episode reward: [(0, '12.040')] +[2024-10-02 02:57:29,634][04725] Saving new best policy, reward=12.040! +[2024-10-02 02:57:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1314816. Throughput: 0: 163.4. Samples: 330448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:57:32,732][00821] Avg episode reward: [(0, '12.178')] +[2024-10-02 02:57:35,038][04725] Saving new best policy, reward=12.178! +[2024-10-02 02:57:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1318912. Throughput: 0: 176.6. Samples: 331978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 02:57:37,735][00821] Avg episode reward: [(0, '12.348')] +[2024-10-02 02:57:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1323008. Throughput: 0: 185.6. Samples: 332524. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 02:57:42,734][00821] Avg episode reward: [(0, '12.402')] +[2024-10-02 02:57:43,636][04725] Saving new best policy, reward=12.348! +[2024-10-02 02:57:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1327104. Throughput: 0: 195.6. Samples: 333748. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 02:57:47,741][00821] Avg episode reward: [(0, '12.547')] +[2024-10-02 02:57:50,188][04725] Saving new best policy, reward=12.402! +[2024-10-02 02:57:50,360][04725] Saving new best policy, reward=12.547! +[2024-10-02 02:57:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1331200. Throughput: 0: 197.2. Samples: 334902. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 02:57:52,732][00821] Avg episode reward: [(0, '12.665')] +[2024-10-02 02:57:54,774][04725] Saving new best policy, reward=12.665! +[2024-10-02 02:57:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1335296. Throughput: 0: 204.5. Samples: 335452. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 02:57:57,740][00821] Avg episode reward: [(0, '12.859')] +[2024-10-02 02:58:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1339392. Throughput: 0: 214.0. Samples: 336984. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 02:58:02,732][00821] Avg episode reward: [(0, '12.931')] +[2024-10-02 02:58:04,266][04725] Saving new best policy, reward=12.859! +[2024-10-02 02:58:07,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1343488. Throughput: 0: 196.7. Samples: 337928. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:07,741][00821] Avg episode reward: [(0, '12.931')] +[2024-10-02 02:58:10,270][04725] Saving new best policy, reward=12.931! +[2024-10-02 02:58:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1347584. Throughput: 0: 198.0. Samples: 338352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:12,739][00821] Avg episode reward: [(0, '12.385')] +[2024-10-02 02:58:14,516][04739] Updated weights for policy 0, policy_version 330 (0.0596) +[2024-10-02 02:58:17,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1351680. Throughput: 0: 214.5. Samples: 340100. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:17,738][00821] Avg episode reward: [(0, '12.089')] +[2024-10-02 02:58:22,733][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 791.4). Total num frames: 1355776. Throughput: 0: 208.7. Samples: 341370. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:22,737][00821] Avg episode reward: [(0, '12.422')] +[2024-10-02 02:58:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1359872. Throughput: 0: 199.3. Samples: 341492. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:27,737][00821] Avg episode reward: [(0, '12.393')] +[2024-10-02 02:58:29,697][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000333_1363968.pth... +[2024-10-02 02:58:29,814][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000286_1171456.pth +[2024-10-02 02:58:32,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1363968. Throughput: 0: 207.4. Samples: 343082. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:32,736][00821] Avg episode reward: [(0, '12.215')] +[2024-10-02 02:58:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1368064. Throughput: 0: 214.4. Samples: 344552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:58:37,732][00821] Avg episode reward: [(0, '12.232')] +[2024-10-02 02:58:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1372160. Throughput: 0: 211.1. Samples: 344952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:58:42,734][00821] Avg episode reward: [(0, '12.278')] +[2024-10-02 02:58:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1376256. Throughput: 0: 202.4. Samples: 346092. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:58:47,740][00821] Avg episode reward: [(0, '12.222')] +[2024-10-02 02:58:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1380352. Throughput: 0: 214.7. Samples: 347590. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:58:52,740][00821] Avg episode reward: [(0, '12.089')] +[2024-10-02 02:58:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1384448. Throughput: 0: 224.6. Samples: 348458. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:58:57,731][00821] Avg episode reward: [(0, '12.083')] +[2024-10-02 02:59:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1388544. Throughput: 0: 204.5. Samples: 349302. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:59:02,735][00821] Avg episode reward: [(0, '12.405')] +[2024-10-02 02:59:04,514][04739] Updated weights for policy 0, policy_version 340 (0.0663) +[2024-10-02 02:59:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1392640. Throughput: 0: 207.2. Samples: 350694. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 02:59:07,739][00821] Avg episode reward: [(0, '12.593')] +[2024-10-02 02:59:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1400832. Throughput: 0: 216.9. Samples: 351254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:12,737][00821] Avg episode reward: [(0, '12.004')] +[2024-10-02 02:59:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1400832. Throughput: 0: 214.9. Samples: 352754. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:17,737][00821] Avg episode reward: [(0, '11.865')] +[2024-10-02 02:59:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 805.3). Total num frames: 1404928. Throughput: 0: 203.6. Samples: 353716. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:22,738][00821] Avg episode reward: [(0, '11.770')] +[2024-10-02 02:59:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1409024. Throughput: 0: 211.0. Samples: 354448. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:27,735][00821] Avg episode reward: [(0, '12.047')] +[2024-10-02 02:59:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1417216. Throughput: 0: 216.1. Samples: 355818. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:32,734][00821] Avg episode reward: [(0, '12.406')] +[2024-10-02 02:59:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1417216. Throughput: 0: 205.2. Samples: 356822. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:37,737][00821] Avg episode reward: [(0, '13.046')] +[2024-10-02 02:59:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1421312. Throughput: 0: 196.8. Samples: 357316. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 02:59:42,732][00821] Avg episode reward: [(0, '12.875')] +[2024-10-02 02:59:43,133][04725] Saving new best policy, reward=13.046! +[2024-10-02 02:59:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1429504. Throughput: 0: 212.7. Samples: 358874. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:59:47,732][00821] Avg episode reward: [(0, '12.694')] +[2024-10-02 02:59:52,170][04739] Updated weights for policy 0, policy_version 350 (0.1506) +[2024-10-02 02:59:52,732][00821] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 805.3). Total num frames: 1433600. Throughput: 0: 205.1. Samples: 359926. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:59:52,735][00821] Avg episode reward: [(0, '12.692')] +[2024-10-02 02:59:56,611][04725] Signal inference workers to stop experience collection... (350 times) +[2024-10-02 02:59:56,790][04739] InferenceWorker_p0-w0: stopping experience collection (350 times) +[2024-10-02 02:59:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1433600. Throughput: 0: 208.0. Samples: 360616. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 02:59:57,734][00821] Avg episode reward: [(0, '12.641')] +[2024-10-02 02:59:58,481][04725] Signal inference workers to resume experience collection... (350 times) +[2024-10-02 02:59:58,482][04739] InferenceWorker_p0-w0: resuming experience collection (350 times) +[2024-10-02 03:00:02,729][00821] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 1437696. Throughput: 0: 200.0. Samples: 361752. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:02,736][00821] Avg episode reward: [(0, '12.611')] +[2024-10-02 03:00:07,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1445888. Throughput: 0: 208.5. Samples: 363098. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:00:07,737][00821] Avg episode reward: [(0, '12.950')] +[2024-10-02 03:00:12,730][00821] Fps is (10 sec: 819.1, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1445888. Throughput: 0: 210.0. Samples: 363898. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:00:12,734][00821] Avg episode reward: [(0, '12.916')] +[2024-10-02 03:00:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1449984. Throughput: 0: 199.8. Samples: 364810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:17,738][00821] Avg episode reward: [(0, '13.146')] +[2024-10-02 03:00:22,576][04725] Saving new best policy, reward=13.146! +[2024-10-02 03:00:22,729][00821] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1458176. Throughput: 0: 205.3. Samples: 366062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:22,733][00821] Avg episode reward: [(0, '13.227')] +[2024-10-02 03:00:26,881][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000357_1462272.pth... +[2024-10-02 03:00:26,983][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000310_1269760.pth +[2024-10-02 03:00:27,012][04725] Saving new best policy, reward=13.227! +[2024-10-02 03:00:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1462272. Throughput: 0: 216.4. Samples: 367054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:27,737][00821] Avg episode reward: [(0, '13.308')] +[2024-10-02 03:00:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1462272. Throughput: 0: 202.2. Samples: 367974. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:32,737][00821] Avg episode reward: [(0, '13.032')] +[2024-10-02 03:00:33,597][04725] Saving new best policy, reward=13.308! +[2024-10-02 03:00:37,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1466368. Throughput: 0: 204.3. Samples: 369118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:37,738][00821] Avg episode reward: [(0, '12.956')] +[2024-10-02 03:00:42,562][04739] Updated weights for policy 0, policy_version 360 (0.0541) +[2024-10-02 03:00:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1474560. Throughput: 0: 206.3. Samples: 369900. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:00:42,733][00821] Avg episode reward: [(0, '13.161')] +[2024-10-02 03:00:47,730][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1478656. Throughput: 0: 208.1. Samples: 371118. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:00:47,732][00821] Avg episode reward: [(0, '13.527')] +[2024-10-02 03:00:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 751.0, 300 sec: 805.3). Total num frames: 1478656. Throughput: 0: 200.5. Samples: 372122. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:00:52,737][00821] Avg episode reward: [(0, '13.825')] +[2024-10-02 03:00:53,587][04725] Saving new best policy, reward=13.527! +[2024-10-02 03:00:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1482752. Throughput: 0: 198.4. Samples: 372828. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:00:57,739][00821] Avg episode reward: [(0, '13.702')] +[2024-10-02 03:00:57,797][04725] Saving new best policy, reward=13.825! +[2024-10-02 03:01:02,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1490944. Throughput: 0: 208.9. Samples: 374210. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:01:02,737][00821] Avg episode reward: [(0, '13.505')] +[2024-10-02 03:01:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1490944. Throughput: 0: 204.6. Samples: 375268. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:01:07,737][00821] Avg episode reward: [(0, '13.293')] +[2024-10-02 03:01:12,730][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1495040. Throughput: 0: 192.1. Samples: 375700. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:01:12,739][00821] Avg episode reward: [(0, '13.461')] +[2024-10-02 03:01:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1499136. Throughput: 0: 208.4. Samples: 377352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:01:17,738][00821] Avg episode reward: [(0, '13.693')] +[2024-10-02 03:01:22,729][00821] Fps is (10 sec: 1228.9, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1507328. Throughput: 0: 205.6. Samples: 378372. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:01:22,737][00821] Avg episode reward: [(0, '14.495')] +[2024-10-02 03:01:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1507328. Throughput: 0: 202.8. Samples: 379028. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:01:27,734][00821] Avg episode reward: [(0, '14.840')] +[2024-10-02 03:01:28,921][04725] Saving new best policy, reward=14.495! +[2024-10-02 03:01:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1511424. Throughput: 0: 199.7. Samples: 380104. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:01:32,737][00821] Avg episode reward: [(0, '14.836')] +[2024-10-02 03:01:33,681][04725] Saving new best policy, reward=14.840! +[2024-10-02 03:01:33,696][04739] Updated weights for policy 0, policy_version 370 (0.0578) +[2024-10-02 03:01:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1515520. Throughput: 0: 207.2. Samples: 381444. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:01:37,732][00821] Avg episode reward: [(0, '15.368')] +[2024-10-02 03:01:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1519616. Throughput: 0: 211.2. Samples: 382334. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:01:42,732][00821] Avg episode reward: [(0, '14.797')] +[2024-10-02 03:01:43,082][04725] Saving new best policy, reward=15.368! +[2024-10-02 03:01:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1523712. Throughput: 0: 198.4. Samples: 383138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:01:47,732][00821] Avg episode reward: [(0, '14.718')] +[2024-10-02 03:01:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1527808. Throughput: 0: 206.1. Samples: 384544. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:01:52,732][00821] Avg episode reward: [(0, '14.894')] +[2024-10-02 03:01:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1536000. Throughput: 0: 214.3. Samples: 385342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:01:57,732][00821] Avg episode reward: [(0, '14.851')] +[2024-10-02 03:02:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1536000. Throughput: 0: 204.3. Samples: 386546. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:02,732][00821] Avg episode reward: [(0, '14.892')] +[2024-10-02 03:02:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1540096. Throughput: 0: 204.5. Samples: 387576. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:07,732][00821] Avg episode reward: [(0, '14.906')] +[2024-10-02 03:02:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1544192. Throughput: 0: 203.8. Samples: 388198. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:12,731][00821] Avg episode reward: [(0, '14.522')] +[2024-10-02 03:02:17,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1552384. Throughput: 0: 211.4. Samples: 389616. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:17,734][00821] Avg episode reward: [(0, '14.185')] +[2024-10-02 03:02:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1552384. Throughput: 0: 203.7. Samples: 390610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:22,733][00821] Avg episode reward: [(0, '14.392')] +[2024-10-02 03:02:23,827][04739] Updated weights for policy 0, policy_version 380 (0.0552) +[2024-10-02 03:02:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1556480. Throughput: 0: 194.0. Samples: 391066. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:27,741][00821] Avg episode reward: [(0, '14.132')] +[2024-10-02 03:02:28,201][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000381_1560576.pth... +[2024-10-02 03:02:28,323][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000333_1363968.pth +[2024-10-02 03:02:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1564672. Throughput: 0: 212.5. Samples: 392700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:32,732][00821] Avg episode reward: [(0, '14.223')] +[2024-10-02 03:02:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1564672. Throughput: 0: 203.5. Samples: 393700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:37,734][00821] Avg episode reward: [(0, '14.564')] +[2024-10-02 03:02:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1568768. Throughput: 0: 198.6. Samples: 394280. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:02:42,734][00821] Avg episode reward: [(0, '14.464')] +[2024-10-02 03:02:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1572864. Throughput: 0: 204.0. Samples: 395724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:02:47,732][00821] Avg episode reward: [(0, '13.958')] +[2024-10-02 03:02:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1581056. Throughput: 0: 206.4. Samples: 396866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:02:52,733][00821] Avg episode reward: [(0, '14.177')] +[2024-10-02 03:02:57,731][00821] Fps is (10 sec: 819.1, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1581056. Throughput: 0: 209.2. Samples: 397614. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:02:57,737][00821] Avg episode reward: [(0, '13.755')] +[2024-10-02 03:03:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1585152. Throughput: 0: 200.3. Samples: 398628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:02,739][00821] Avg episode reward: [(0, '13.764')] +[2024-10-02 03:03:07,729][00821] Fps is (10 sec: 1229.0, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1593344. Throughput: 0: 205.8. Samples: 399872. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:07,736][00821] Avg episode reward: [(0, '13.723')] +[2024-10-02 03:03:12,010][04739] Updated weights for policy 0, policy_version 390 (0.0552) +[2024-10-02 03:03:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1597440. Throughput: 0: 216.3. Samples: 400800. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:12,734][00821] Avg episode reward: [(0, '13.799')] +[2024-10-02 03:03:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1597440. Throughput: 0: 202.5. Samples: 401814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:17,732][00821] Avg episode reward: [(0, '13.901')] +[2024-10-02 03:03:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1605632. Throughput: 0: 205.1. Samples: 402928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:22,738][00821] Avg episode reward: [(0, '14.113')] +[2024-10-02 03:03:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1609728. Throughput: 0: 211.5. Samples: 403796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:03:27,732][00821] Avg episode reward: [(0, '14.301')] +[2024-10-02 03:03:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1613824. Throughput: 0: 205.2. Samples: 404960. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:32,732][00821] Avg episode reward: [(0, '14.324')] +[2024-10-02 03:03:37,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1613824. Throughput: 0: 202.0. Samples: 405954. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:37,740][00821] Avg episode reward: [(0, '14.452')] +[2024-10-02 03:03:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1622016. Throughput: 0: 205.4. Samples: 406858. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:42,739][00821] Avg episode reward: [(0, '14.723')] +[2024-10-02 03:03:47,733][00821] Fps is (10 sec: 1228.3, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 1626112. Throughput: 0: 208.6. Samples: 408014. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:47,737][00821] Avg episode reward: [(0, '15.226')] +[2024-10-02 03:03:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1630208. Throughput: 0: 204.0. Samples: 409052. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:52,735][00821] Avg episode reward: [(0, '15.140')] +[2024-10-02 03:03:57,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1634304. Throughput: 0: 197.5. Samples: 409686. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:03:57,734][00821] Avg episode reward: [(0, '15.008')] +[2024-10-02 03:04:02,227][04739] Updated weights for policy 0, policy_version 400 (0.1518) +[2024-10-02 03:04:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1638400. Throughput: 0: 205.6. Samples: 411066. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:02,733][00821] Avg episode reward: [(0, '15.228')] +[2024-10-02 03:04:04,972][04725] Signal inference workers to stop experience collection... (400 times) +[2024-10-02 03:04:05,048][04739] InferenceWorker_p0-w0: stopping experience collection (400 times) +[2024-10-02 03:04:06,460][04725] Signal inference workers to resume experience collection... (400 times) +[2024-10-02 03:04:06,466][04739] InferenceWorker_p0-w0: resuming experience collection (400 times) +[2024-10-02 03:04:07,732][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1642496. Throughput: 0: 204.9. Samples: 412150. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:07,743][00821] Avg episode reward: [(0, '15.693')] +[2024-10-02 03:04:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1642496. Throughput: 0: 199.9. Samples: 412790. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:12,732][00821] Avg episode reward: [(0, '15.693')] +[2024-10-02 03:04:12,990][04725] Saving new best policy, reward=15.693! +[2024-10-02 03:04:17,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1650688. Throughput: 0: 205.1. Samples: 414188. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:04:17,735][00821] Avg episode reward: [(0, '15.450')] +[2024-10-02 03:04:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1654784. Throughput: 0: 212.6. Samples: 415522. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:22,736][00821] Avg episode reward: [(0, '15.309')] +[2024-10-02 03:04:26,804][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000405_1658880.pth... +[2024-10-02 03:04:26,974][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000357_1462272.pth +[2024-10-02 03:04:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1658880. Throughput: 0: 207.4. Samples: 416192. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:27,733][00821] Avg episode reward: [(0, '15.272')] +[2024-10-02 03:04:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1662976. Throughput: 0: 204.5. Samples: 417216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:04:32,733][00821] Avg episode reward: [(0, '15.643')] +[2024-10-02 03:04:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1667072. Throughput: 0: 208.5. Samples: 418434. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:04:37,734][00821] Avg episode reward: [(0, '15.682')] +[2024-10-02 03:04:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1671168. Throughput: 0: 213.6. Samples: 419296. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:04:42,733][00821] Avg episode reward: [(0, '15.633')] +[2024-10-02 03:04:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 1675264. Throughput: 0: 205.5. Samples: 420314. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:04:47,732][00821] Avg episode reward: [(0, '15.993')] +[2024-10-02 03:04:52,369][04725] Saving new best policy, reward=15.993! +[2024-10-02 03:04:52,381][04739] Updated weights for policy 0, policy_version 410 (0.2302) +[2024-10-02 03:04:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1679360. Throughput: 0: 204.7. Samples: 421360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:52,734][00821] Avg episode reward: [(0, '15.767')] +[2024-10-02 03:04:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1683456. Throughput: 0: 212.9. Samples: 422372. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:04:57,732][00821] Avg episode reward: [(0, '16.405')] +[2024-10-02 03:05:00,829][04725] Saving new best policy, reward=16.405! +[2024-10-02 03:05:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1687552. Throughput: 0: 207.2. Samples: 423512. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:05:02,732][00821] Avg episode reward: [(0, '16.436')] +[2024-10-02 03:05:07,329][04725] Saving new best policy, reward=16.436! +[2024-10-02 03:05:07,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1691648. Throughput: 0: 198.2. Samples: 424440. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:05:07,742][00821] Avg episode reward: [(0, '16.376')] +[2024-10-02 03:05:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 1695744. Throughput: 0: 205.1. Samples: 425422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:05:12,732][00821] Avg episode reward: [(0, '16.294')] +[2024-10-02 03:05:17,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1699840. Throughput: 0: 205.1. Samples: 426444. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:05:17,731][00821] Avg episode reward: [(0, '16.501')] +[2024-10-02 03:05:21,019][04725] Saving new best policy, reward=16.501! +[2024-10-02 03:05:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1703936. Throughput: 0: 206.8. Samples: 427738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:05:22,735][00821] Avg episode reward: [(0, '16.658')] +[2024-10-02 03:05:27,311][04725] Saving new best policy, reward=16.658! +[2024-10-02 03:05:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1708032. Throughput: 0: 201.6. Samples: 428368. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:05:27,737][00821] Avg episode reward: [(0, '16.698')] +[2024-10-02 03:05:31,523][04725] Saving new best policy, reward=16.698! +[2024-10-02 03:05:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1712128. Throughput: 0: 204.9. Samples: 429536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:05:32,739][00821] Avg episode reward: [(0, '17.057')] +[2024-10-02 03:05:35,706][04725] Saving new best policy, reward=17.057! +[2024-10-02 03:05:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1716224. Throughput: 0: 216.1. Samples: 431084. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:05:37,732][00821] Avg episode reward: [(0, '17.501')] +[2024-10-02 03:05:41,716][04725] Saving new best policy, reward=17.501! +[2024-10-02 03:05:41,714][04739] Updated weights for policy 0, policy_version 420 (0.0530) +[2024-10-02 03:05:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1720320. Throughput: 0: 204.0. Samples: 431552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:05:42,738][00821] Avg episode reward: [(0, '17.494')] +[2024-10-02 03:05:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1724416. Throughput: 0: 201.6. Samples: 432586. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:05:47,734][00821] Avg episode reward: [(0, '16.732')] +[2024-10-02 03:05:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1728512. Throughput: 0: 214.4. Samples: 434088. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:05:52,732][00821] Avg episode reward: [(0, '16.555')] +[2024-10-02 03:05:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1732608. Throughput: 0: 204.7. Samples: 434632. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:05:57,733][00821] Avg episode reward: [(0, '16.523')] +[2024-10-02 03:06:02,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1736704. Throughput: 0: 205.6. Samples: 435696. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:06:02,735][00821] Avg episode reward: [(0, '16.931')] +[2024-10-02 03:06:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1740800. Throughput: 0: 202.6. Samples: 436854. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:06:07,732][00821] Avg episode reward: [(0, '16.307')] +[2024-10-02 03:06:12,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1744896. Throughput: 0: 208.8. Samples: 437762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:06:12,733][00821] Avg episode reward: [(0, '16.405')] +[2024-10-02 03:06:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1748992. Throughput: 0: 207.1. Samples: 438854. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:06:17,733][00821] Avg episode reward: [(0, '16.976')] +[2024-10-02 03:06:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1753088. Throughput: 0: 196.7. Samples: 439936. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:06:22,733][00821] Avg episode reward: [(0, '17.138')] +[2024-10-02 03:06:26,479][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000429_1757184.pth... +[2024-10-02 03:06:26,601][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000381_1560576.pth +[2024-10-02 03:06:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1757184. Throughput: 0: 208.0. Samples: 440914. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:06:27,740][00821] Avg episode reward: [(0, '17.039')] +[2024-10-02 03:06:30,713][04739] Updated weights for policy 0, policy_version 430 (0.0531) +[2024-10-02 03:06:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1761280. Throughput: 0: 213.3. Samples: 442184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:32,736][00821] Avg episode reward: [(0, '17.207')] +[2024-10-02 03:06:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1765376. Throughput: 0: 199.9. Samples: 443084. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:37,732][00821] Avg episode reward: [(0, '17.207')] +[2024-10-02 03:06:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1769472. Throughput: 0: 203.4. Samples: 443784. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:42,740][00821] Avg episode reward: [(0, '16.319')] +[2024-10-02 03:06:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1773568. Throughput: 0: 208.3. Samples: 445068. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:47,733][00821] Avg episode reward: [(0, '16.983')] +[2024-10-02 03:06:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1777664. Throughput: 0: 213.2. Samples: 446446. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:52,736][00821] Avg episode reward: [(0, '17.091')] +[2024-10-02 03:06:57,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1781760. Throughput: 0: 207.5. Samples: 447100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:06:57,734][00821] Avg episode reward: [(0, '17.055')] +[2024-10-02 03:07:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1785856. Throughput: 0: 205.4. Samples: 448098. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:07:02,733][00821] Avg episode reward: [(0, '16.958')] +[2024-10-02 03:07:07,730][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1785856. Throughput: 0: 202.6. Samples: 449052. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:07:07,751][00821] Avg episode reward: [(0, '16.766')] +[2024-10-02 03:07:12,736][00821] Fps is (10 sec: 409.4, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1789952. Throughput: 0: 186.4. Samples: 449304. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:07:12,744][00821] Avg episode reward: [(0, '16.801')] +[2024-10-02 03:07:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 1789952. Throughput: 0: 172.8. Samples: 449958. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:07:17,734][00821] Avg episode reward: [(0, '16.608')] +[2024-10-02 03:07:22,729][00821] Fps is (10 sec: 409.8, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 1794048. Throughput: 0: 176.7. Samples: 451036. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:07:22,738][00821] Avg episode reward: [(0, '16.257')] +[2024-10-02 03:07:27,489][04739] Updated weights for policy 0, policy_version 440 (0.2489) +[2024-10-02 03:07:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1802240. Throughput: 0: 175.2. Samples: 451668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:07:27,738][00821] Avg episode reward: [(0, '16.099')] +[2024-10-02 03:07:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1806336. Throughput: 0: 179.3. Samples: 453138. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:07:32,735][00821] Avg episode reward: [(0, '16.748')] +[2024-10-02 03:07:37,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 1806336. Throughput: 0: 171.2. Samples: 454148. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:07:37,732][00821] Avg episode reward: [(0, '16.758')] +[2024-10-02 03:07:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 1810432. Throughput: 0: 170.5. Samples: 454770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:07:42,732][00821] Avg episode reward: [(0, '16.879')] +[2024-10-02 03:07:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1818624. Throughput: 0: 180.5. Samples: 456222. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:07:47,732][00821] Avg episode reward: [(0, '17.075')] +[2024-10-02 03:07:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 1822720. Throughput: 0: 181.6. Samples: 457224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:07:52,733][00821] Avg episode reward: [(0, '17.311')] +[2024-10-02 03:07:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 1822720. Throughput: 0: 189.5. Samples: 457832. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:07:57,732][00821] Avg episode reward: [(0, '17.374')] +[2024-10-02 03:08:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1830912. Throughput: 0: 206.6. Samples: 459254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:08:02,732][00821] Avg episode reward: [(0, '17.864')] +[2024-10-02 03:08:06,497][04725] Saving new best policy, reward=17.864! +[2024-10-02 03:08:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1835008. Throughput: 0: 212.3. Samples: 460588. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:07,736][00821] Avg episode reward: [(0, '17.864')] +[2024-10-02 03:08:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 1839104. Throughput: 0: 212.9. Samples: 461248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:12,736][00821] Avg episode reward: [(0, '17.132')] +[2024-10-02 03:08:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1843200. Throughput: 0: 204.7. Samples: 462350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:17,740][00821] Avg episode reward: [(0, '17.099')] +[2024-10-02 03:08:18,173][04739] Updated weights for policy 0, policy_version 450 (0.1058) +[2024-10-02 03:08:20,716][04725] Signal inference workers to stop experience collection... (450 times) +[2024-10-02 03:08:20,773][04739] InferenceWorker_p0-w0: stopping experience collection (450 times) +[2024-10-02 03:08:21,900][04725] Signal inference workers to resume experience collection... (450 times) +[2024-10-02 03:08:21,901][04739] InferenceWorker_p0-w0: resuming experience collection (450 times) +[2024-10-02 03:08:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1847296. Throughput: 0: 206.6. Samples: 463444. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:22,731][00821] Avg episode reward: [(0, '17.340')] +[2024-10-02 03:08:26,585][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000452_1851392.pth... +[2024-10-02 03:08:26,725][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000405_1658880.pth +[2024-10-02 03:08:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1851392. Throughput: 0: 215.0. Samples: 464444. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:27,732][00821] Avg episode reward: [(0, '17.981')] +[2024-10-02 03:08:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 1851392. Throughput: 0: 205.2. Samples: 465458. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:32,732][00821] Avg episode reward: [(0, '18.526')] +[2024-10-02 03:08:33,165][04725] Saving new best policy, reward=17.981! +[2024-10-02 03:08:37,485][04725] Saving new best policy, reward=18.526! +[2024-10-02 03:08:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1859584. Throughput: 0: 205.7. Samples: 466482. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:08:37,733][00821] Avg episode reward: [(0, '18.898')] +[2024-10-02 03:08:41,683][04725] Saving new best policy, reward=18.898! +[2024-10-02 03:08:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1863680. Throughput: 0: 214.5. Samples: 467484. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:08:42,731][00821] Avg episode reward: [(0, '18.824')] +[2024-10-02 03:08:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1867776. Throughput: 0: 204.7. Samples: 468466. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:08:47,736][00821] Avg episode reward: [(0, '18.839')] +[2024-10-02 03:08:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 1867776. Throughput: 0: 197.1. Samples: 469456. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:08:52,742][00821] Avg episode reward: [(0, '18.689')] +[2024-10-02 03:08:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1875968. Throughput: 0: 204.7. Samples: 470458. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:08:57,735][00821] Avg episode reward: [(0, '18.947')] +[2024-10-02 03:09:01,103][04725] Saving new best policy, reward=18.947! +[2024-10-02 03:09:02,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1880064. Throughput: 0: 205.3. Samples: 471588. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:09:02,741][00821] Avg episode reward: [(0, '19.486')] +[2024-10-02 03:09:07,370][04725] Saving new best policy, reward=19.486! +[2024-10-02 03:09:07,379][04739] Updated weights for policy 0, policy_version 460 (0.0580) +[2024-10-02 03:09:07,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 1884160. Throughput: 0: 200.7. Samples: 472478. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:07,743][00821] Avg episode reward: [(0, '19.495')] +[2024-10-02 03:09:12,412][04725] Saving new best policy, reward=19.495! +[2024-10-02 03:09:12,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1888256. Throughput: 0: 196.2. Samples: 473274. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:12,732][00821] Avg episode reward: [(0, '19.923')] +[2024-10-02 03:09:16,539][04725] Saving new best policy, reward=19.923! +[2024-10-02 03:09:17,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1892352. Throughput: 0: 202.0. Samples: 474550. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:17,732][00821] Avg episode reward: [(0, '20.108')] +[2024-10-02 03:09:21,168][04725] Saving new best policy, reward=20.108! +[2024-10-02 03:09:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1896448. Throughput: 0: 208.2. Samples: 475850. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:22,738][00821] Avg episode reward: [(0, '19.552')] +[2024-10-02 03:09:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1900544. Throughput: 0: 200.3. Samples: 476498. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:27,732][00821] Avg episode reward: [(0, '19.850')] +[2024-10-02 03:09:32,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1904640. Throughput: 0: 203.9. Samples: 477642. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:32,732][00821] Avg episode reward: [(0, '20.200')] +[2024-10-02 03:09:35,859][04725] Saving new best policy, reward=20.200! +[2024-10-02 03:09:37,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1908736. Throughput: 0: 216.7. Samples: 479208. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:37,732][00821] Avg episode reward: [(0, '19.355')] +[2024-10-02 03:09:42,737][00821] Fps is (10 sec: 818.6, 60 sec: 819.1, 300 sec: 805.3). Total num frames: 1912832. Throughput: 0: 205.6. Samples: 479712. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:09:42,742][00821] Avg episode reward: [(0, '19.705')] +[2024-10-02 03:09:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1916928. Throughput: 0: 203.9. Samples: 480762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:09:47,740][00821] Avg episode reward: [(0, '19.520')] +[2024-10-02 03:09:52,729][00821] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 1921024. Throughput: 0: 215.8. Samples: 482186. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:09:52,731][00821] Avg episode reward: [(0, '18.974')] +[2024-10-02 03:09:55,521][04739] Updated weights for policy 0, policy_version 470 (0.0571) +[2024-10-02 03:09:57,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1925120. Throughput: 0: 212.2. Samples: 482824. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:09:57,737][00821] Avg episode reward: [(0, '18.710')] +[2024-10-02 03:10:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1929216. Throughput: 0: 207.5. Samples: 483886. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:02,734][00821] Avg episode reward: [(0, '18.789')] +[2024-10-02 03:10:07,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1933312. Throughput: 0: 203.4. Samples: 485002. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:07,734][00821] Avg episode reward: [(0, '18.235')] +[2024-10-02 03:10:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1937408. Throughput: 0: 208.7. Samples: 485888. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:12,732][00821] Avg episode reward: [(0, '17.198')] +[2024-10-02 03:10:17,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1941504. Throughput: 0: 210.9. Samples: 487132. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:17,735][00821] Avg episode reward: [(0, '16.927')] +[2024-10-02 03:10:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1945600. Throughput: 0: 198.8. Samples: 488156. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:22,741][00821] Avg episode reward: [(0, '16.749')] +[2024-10-02 03:10:25,865][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000476_1949696.pth... +[2024-10-02 03:10:25,980][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000429_1757184.pth +[2024-10-02 03:10:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1949696. Throughput: 0: 205.3. Samples: 488950. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:27,732][00821] Avg episode reward: [(0, '16.279')] +[2024-10-02 03:10:32,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1953792. Throughput: 0: 217.2. Samples: 490538. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:32,735][00821] Avg episode reward: [(0, '15.638')] +[2024-10-02 03:10:37,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1957888. Throughput: 0: 206.8. Samples: 491494. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:37,738][00821] Avg episode reward: [(0, '15.571')] +[2024-10-02 03:10:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 805.3). Total num frames: 1961984. Throughput: 0: 205.7. Samples: 492082. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:42,732][00821] Avg episode reward: [(0, '15.625')] +[2024-10-02 03:10:45,320][04739] Updated weights for policy 0, policy_version 480 (0.1148) +[2024-10-02 03:10:47,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1966080. Throughput: 0: 214.1. Samples: 493520. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:47,736][00821] Avg episode reward: [(0, '15.976')] +[2024-10-02 03:10:52,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1970176. Throughput: 0: 221.2. Samples: 494956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:10:52,733][00821] Avg episode reward: [(0, '15.967')] +[2024-10-02 03:10:57,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1974272. Throughput: 0: 206.5. Samples: 495180. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:10:57,737][00821] Avg episode reward: [(0, '16.079')] +[2024-10-02 03:11:02,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1978368. Throughput: 0: 208.8. Samples: 496530. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:02,732][00821] Avg episode reward: [(0, '15.965')] +[2024-10-02 03:11:07,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1982464. Throughput: 0: 221.3. Samples: 498114. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:07,739][00821] Avg episode reward: [(0, '15.787')] +[2024-10-02 03:11:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1986560. Throughput: 0: 212.7. Samples: 498522. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:12,740][00821] Avg episode reward: [(0, '15.960')] +[2024-10-02 03:11:17,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1990656. Throughput: 0: 198.1. Samples: 499454. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:11:17,735][00821] Avg episode reward: [(0, '16.367')] +[2024-10-02 03:11:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1994752. Throughput: 0: 215.7. Samples: 501200. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:11:22,741][00821] Avg episode reward: [(0, '16.885')] +[2024-10-02 03:11:27,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 1998848. Throughput: 0: 212.9. Samples: 501662. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:27,732][00821] Avg episode reward: [(0, '16.783')] +[2024-10-02 03:11:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 2002944. Throughput: 0: 207.4. Samples: 502854. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:32,737][00821] Avg episode reward: [(0, '16.768')] +[2024-10-02 03:11:34,707][04739] Updated weights for policy 0, policy_version 490 (0.1014) +[2024-10-02 03:11:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 2007040. Throughput: 0: 206.3. Samples: 504238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:37,743][00821] Avg episode reward: [(0, '17.223')] +[2024-10-02 03:11:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 2011136. Throughput: 0: 213.5. Samples: 504788. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:11:42,734][00821] Avg episode reward: [(0, '17.485')] +[2024-10-02 03:11:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 2015232. Throughput: 0: 215.0. Samples: 506206. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:11:47,732][00821] Avg episode reward: [(0, '17.718')] +[2024-10-02 03:11:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 2019328. Throughput: 0: 198.2. Samples: 507034. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:11:52,739][00821] Avg episode reward: [(0, '17.529')] +[2024-10-02 03:11:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 805.3). Total num frames: 2023424. Throughput: 0: 203.9. Samples: 507698. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:11:57,732][00821] Avg episode reward: [(0, '17.915')] +[2024-10-02 03:12:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2027520. Throughput: 0: 218.0. Samples: 509264. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:02,740][00821] Avg episode reward: [(0, '17.876')] +[2024-10-02 03:12:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2031616. Throughput: 0: 204.3. Samples: 510394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:07,738][00821] Avg episode reward: [(0, '17.809')] +[2024-10-02 03:12:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2035712. Throughput: 0: 200.1. Samples: 510668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:12,741][00821] Avg episode reward: [(0, '18.333')] +[2024-10-02 03:12:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2039808. Throughput: 0: 210.3. Samples: 512316. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:17,739][00821] Avg episode reward: [(0, '18.559')] +[2024-10-02 03:12:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2043904. Throughput: 0: 206.7. Samples: 513538. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:22,738][00821] Avg episode reward: [(0, '18.772')] +[2024-10-02 03:12:23,081][04739] Updated weights for policy 0, policy_version 500 (0.1040) +[2024-10-02 03:12:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2048000. Throughput: 0: 205.1. Samples: 514016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:12:27,731][00821] Avg episode reward: [(0, '19.270')] +[2024-10-02 03:12:27,802][04725] Signal inference workers to stop experience collection... (500 times) +[2024-10-02 03:12:27,894][04739] InferenceWorker_p0-w0: stopping experience collection (500 times) +[2024-10-02 03:12:29,568][04725] Signal inference workers to resume experience collection... (500 times) +[2024-10-02 03:12:29,570][04739] InferenceWorker_p0-w0: resuming experience collection (500 times) +[2024-10-02 03:12:29,581][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000501_2052096.pth... +[2024-10-02 03:12:29,716][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000452_1851392.pth +[2024-10-02 03:12:32,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2052096. Throughput: 0: 200.3. Samples: 515218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:12:32,733][00821] Avg episode reward: [(0, '18.844')] +[2024-10-02 03:12:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2056192. Throughput: 0: 212.5. Samples: 516596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:12:37,732][00821] Avg episode reward: [(0, '18.719')] +[2024-10-02 03:12:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2060288. Throughput: 0: 215.3. Samples: 517386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:12:42,735][00821] Avg episode reward: [(0, '19.090')] +[2024-10-02 03:12:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2064384. Throughput: 0: 199.5. Samples: 518240. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:12:47,735][00821] Avg episode reward: [(0, '18.861')] +[2024-10-02 03:12:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2068480. Throughput: 0: 206.1. Samples: 519668. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:12:52,732][00821] Avg episode reward: [(0, '19.191')] +[2024-10-02 03:12:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2076672. Throughput: 0: 218.6. Samples: 520504. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:12:57,733][00821] Avg episode reward: [(0, '19.263')] +[2024-10-02 03:13:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2076672. Throughput: 0: 208.8. Samples: 521714. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:13:02,731][00821] Avg episode reward: [(0, '19.355')] +[2024-10-02 03:13:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2080768. Throughput: 0: 206.0. Samples: 522808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:13:07,739][00821] Avg episode reward: [(0, '19.118')] +[2024-10-02 03:13:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2084864. Throughput: 0: 207.4. Samples: 523350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:13:12,740][00821] Avg episode reward: [(0, '19.174')] +[2024-10-02 03:13:13,119][04739] Updated weights for policy 0, policy_version 510 (0.1041) +[2024-10-02 03:13:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2088960. Throughput: 0: 214.8. Samples: 524882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:13:17,731][00821] Avg episode reward: [(0, '19.211')] +[2024-10-02 03:13:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2093056. Throughput: 0: 207.1. Samples: 525916. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:13:22,736][00821] Avg episode reward: [(0, '18.980')] +[2024-10-02 03:13:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2097152. Throughput: 0: 199.7. Samples: 526372. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:13:27,743][00821] Avg episode reward: [(0, '18.827')] +[2024-10-02 03:13:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2105344. Throughput: 0: 215.5. Samples: 527936. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:13:32,732][00821] Avg episode reward: [(0, '19.055')] +[2024-10-02 03:13:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2109440. Throughput: 0: 206.4. Samples: 528956. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:13:37,736][00821] Avg episode reward: [(0, '18.788')] +[2024-10-02 03:13:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2109440. Throughput: 0: 200.4. Samples: 529520. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:13:42,736][00821] Avg episode reward: [(0, '19.089')] +[2024-10-02 03:13:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2117632. Throughput: 0: 205.4. Samples: 530956. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:13:47,732][00821] Avg episode reward: [(0, '18.924')] +[2024-10-02 03:13:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2121728. Throughput: 0: 209.4. Samples: 532232. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:13:52,736][00821] Avg episode reward: [(0, '18.971')] +[2024-10-02 03:13:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2125824. Throughput: 0: 213.6. Samples: 532960. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:13:57,732][00821] Avg episode reward: [(0, '18.568')] +[2024-10-02 03:14:02,733][04739] Updated weights for policy 0, policy_version 520 (0.0041) +[2024-10-02 03:14:02,742][00821] Fps is (10 sec: 818.2, 60 sec: 887.3, 300 sec: 833.1). Total num frames: 2129920. Throughput: 0: 202.1. Samples: 533978. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:02,745][00821] Avg episode reward: [(0, '18.087')] +[2024-10-02 03:14:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2134016. Throughput: 0: 207.2. Samples: 535242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:07,732][00821] Avg episode reward: [(0, '18.792')] +[2024-10-02 03:14:12,737][00821] Fps is (10 sec: 819.6, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 2138112. Throughput: 0: 215.7. Samples: 536082. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:12,740][00821] Avg episode reward: [(0, '18.686')] +[2024-10-02 03:14:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2138112. Throughput: 0: 203.7. Samples: 537104. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:17,737][00821] Avg episode reward: [(0, '18.204')] +[2024-10-02 03:14:22,729][00821] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2146304. Throughput: 0: 204.8. Samples: 538170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:22,737][00821] Avg episode reward: [(0, '17.985')] +[2024-10-02 03:14:26,293][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth... +[2024-10-02 03:14:26,400][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000476_1949696.pth +[2024-10-02 03:14:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2150400. Throughput: 0: 213.5. Samples: 539128. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:27,732][00821] Avg episode reward: [(0, '17.768')] +[2024-10-02 03:14:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2154496. Throughput: 0: 204.6. Samples: 540162. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:32,732][00821] Avg episode reward: [(0, '17.555')] +[2024-10-02 03:14:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2158592. Throughput: 0: 198.5. Samples: 541166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:14:37,740][00821] Avg episode reward: [(0, '17.439')] +[2024-10-02 03:14:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2162688. Throughput: 0: 203.9. Samples: 542134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:42,740][00821] Avg episode reward: [(0, '17.603')] +[2024-10-02 03:14:47,737][00821] Fps is (10 sec: 818.5, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 2166784. Throughput: 0: 207.5. Samples: 543314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:47,741][00821] Avg episode reward: [(0, '17.436')] +[2024-10-02 03:14:52,536][04739] Updated weights for policy 0, policy_version 530 (0.1687) +[2024-10-02 03:14:52,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2170880. Throughput: 0: 199.9. Samples: 544238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:52,739][00821] Avg episode reward: [(0, '17.658')] +[2024-10-02 03:14:57,730][00821] Fps is (10 sec: 819.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2174976. Throughput: 0: 198.7. Samples: 545024. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:14:57,737][00821] Avg episode reward: [(0, '16.987')] +[2024-10-02 03:15:02,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.4, 300 sec: 833.1). Total num frames: 2179072. Throughput: 0: 203.1. Samples: 546244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:15:02,734][00821] Avg episode reward: [(0, '17.213')] +[2024-10-02 03:15:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2183168. Throughput: 0: 205.6. Samples: 547422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:15:07,739][00821] Avg episode reward: [(0, '17.171')] +[2024-10-02 03:15:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 2183168. Throughput: 0: 198.4. Samples: 548054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:15:12,733][00821] Avg episode reward: [(0, '16.908')] +[2024-10-02 03:15:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2191360. Throughput: 0: 202.8. Samples: 549290. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:17,734][00821] Avg episode reward: [(0, '17.106')] +[2024-10-02 03:15:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2195456. Throughput: 0: 210.0. Samples: 550616. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:22,732][00821] Avg episode reward: [(0, '17.129')] +[2024-10-02 03:15:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2199552. Throughput: 0: 204.6. Samples: 551340. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:27,732][00821] Avg episode reward: [(0, '16.649')] +[2024-10-02 03:15:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 2199552. Throughput: 0: 198.9. Samples: 552262. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:32,739][00821] Avg episode reward: [(0, '16.755')] +[2024-10-02 03:15:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2207744. Throughput: 0: 207.2. Samples: 553562. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:37,732][00821] Avg episode reward: [(0, '17.089')] +[2024-10-02 03:15:41,694][04739] Updated weights for policy 0, policy_version 540 (0.2259) +[2024-10-02 03:15:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2211840. Throughput: 0: 208.2. Samples: 554394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:42,736][00821] Avg episode reward: [(0, '18.073')] +[2024-10-02 03:15:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 2215936. Throughput: 0: 202.5. Samples: 555358. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:47,734][00821] Avg episode reward: [(0, '17.619')] +[2024-10-02 03:15:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2220032. Throughput: 0: 201.4. Samples: 556484. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:52,732][00821] Avg episode reward: [(0, '17.619')] +[2024-10-02 03:15:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2224128. Throughput: 0: 209.9. Samples: 557498. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:15:57,732][00821] Avg episode reward: [(0, '17.506')] +[2024-10-02 03:16:02,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 2228224. Throughput: 0: 206.2. Samples: 558568. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:02,736][00821] Avg episode reward: [(0, '17.306')] +[2024-10-02 03:16:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2232320. Throughput: 0: 197.5. Samples: 559502. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:07,733][00821] Avg episode reward: [(0, '17.983')] +[2024-10-02 03:16:12,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2236416. Throughput: 0: 201.2. Samples: 560396. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:12,732][00821] Avg episode reward: [(0, '18.821')] +[2024-10-02 03:16:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2240512. Throughput: 0: 209.7. Samples: 561698. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:17,732][00821] Avg episode reward: [(0, '18.944')] +[2024-10-02 03:16:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2244608. Throughput: 0: 205.6. Samples: 562812. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:22,737][00821] Avg episode reward: [(0, '18.804')] +[2024-10-02 03:16:27,454][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000549_2248704.pth... +[2024-10-02 03:16:27,576][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000501_2052096.pth +[2024-10-02 03:16:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2248704. Throughput: 0: 201.1. Samples: 563442. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:16:27,732][00821] Avg episode reward: [(0, '18.981')] +[2024-10-02 03:16:31,651][04739] Updated weights for policy 0, policy_version 550 (0.1161) +[2024-10-02 03:16:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2252800. Throughput: 0: 209.7. Samples: 564796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:16:32,731][00821] Avg episode reward: [(0, '19.015')] +[2024-10-02 03:16:34,385][04725] Signal inference workers to stop experience collection... (550 times) +[2024-10-02 03:16:34,444][04739] InferenceWorker_p0-w0: stopping experience collection (550 times) +[2024-10-02 03:16:35,989][04725] Signal inference workers to resume experience collection... (550 times) +[2024-10-02 03:16:35,990][04739] InferenceWorker_p0-w0: resuming experience collection (550 times) +[2024-10-02 03:16:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2256896. Throughput: 0: 215.7. Samples: 566190. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:16:37,733][00821] Avg episode reward: [(0, '18.827')] +[2024-10-02 03:16:42,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2260992. Throughput: 0: 207.8. Samples: 566848. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:16:42,733][00821] Avg episode reward: [(0, '18.997')] +[2024-10-02 03:16:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2265088. Throughput: 0: 207.4. Samples: 567900. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:16:47,738][00821] Avg episode reward: [(0, '18.805')] +[2024-10-02 03:16:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2269184. Throughput: 0: 217.9. Samples: 569308. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:16:52,736][00821] Avg episode reward: [(0, '19.265')] +[2024-10-02 03:16:57,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2273280. Throughput: 0: 210.3. Samples: 569860. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:16:57,733][00821] Avg episode reward: [(0, '19.498')] +[2024-10-02 03:17:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 2277376. Throughput: 0: 204.8. Samples: 570916. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:17:02,737][00821] Avg episode reward: [(0, '20.017')] +[2024-10-02 03:17:07,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2281472. Throughput: 0: 206.6. Samples: 572110. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:17:07,739][00821] Avg episode reward: [(0, '20.251')] +[2024-10-02 03:17:10,634][04725] Saving new best policy, reward=20.251! +[2024-10-02 03:17:12,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2285568. Throughput: 0: 211.6. Samples: 572964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:17:12,735][00821] Avg episode reward: [(0, '20.466')] +[2024-10-02 03:17:16,128][04725] Saving new best policy, reward=20.466! +[2024-10-02 03:17:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2289664. Throughput: 0: 205.0. Samples: 574022. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:17:17,735][00821] Avg episode reward: [(0, '20.488')] +[2024-10-02 03:17:22,000][04725] Saving new best policy, reward=20.488! +[2024-10-02 03:17:22,013][04739] Updated weights for policy 0, policy_version 560 (0.0534) +[2024-10-02 03:17:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2293760. Throughput: 0: 199.6. Samples: 575172. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:22,735][00821] Avg episode reward: [(0, '20.985')] +[2024-10-02 03:17:26,116][04725] Saving new best policy, reward=20.985! +[2024-10-02 03:17:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2297856. Throughput: 0: 204.2. Samples: 576038. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:27,732][00821] Avg episode reward: [(0, '21.070')] +[2024-10-02 03:17:30,281][04725] Saving new best policy, reward=21.070! +[2024-10-02 03:17:32,739][00821] Fps is (10 sec: 818.4, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 2301952. Throughput: 0: 211.4. Samples: 577416. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:32,746][00821] Avg episode reward: [(0, '21.121')] +[2024-10-02 03:17:36,428][04725] Saving new best policy, reward=21.121! +[2024-10-02 03:17:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2306048. Throughput: 0: 199.9. Samples: 578302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:37,735][00821] Avg episode reward: [(0, '21.560')] +[2024-10-02 03:17:41,567][04725] Saving new best policy, reward=21.560! +[2024-10-02 03:17:42,729][00821] Fps is (10 sec: 820.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2310144. Throughput: 0: 205.6. Samples: 579110. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:42,735][00821] Avg episode reward: [(0, '20.729')] +[2024-10-02 03:17:47,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2314240. Throughput: 0: 207.0. Samples: 580232. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:47,736][00821] Avg episode reward: [(0, '20.834')] +[2024-10-02 03:17:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2318336. Throughput: 0: 210.1. Samples: 581566. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:52,732][00821] Avg episode reward: [(0, '20.912')] +[2024-10-02 03:17:57,730][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2322432. Throughput: 0: 205.4. Samples: 582208. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:17:57,733][00821] Avg episode reward: [(0, '21.163')] +[2024-10-02 03:18:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2326528. Throughput: 0: 204.4. Samples: 583218. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:18:02,732][00821] Avg episode reward: [(0, '21.907')] +[2024-10-02 03:18:05,511][04725] Saving new best policy, reward=21.907! +[2024-10-02 03:18:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2330624. Throughput: 0: 218.1. Samples: 584986. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:18:07,741][00821] Avg episode reward: [(0, '21.318')] +[2024-10-02 03:18:11,810][04739] Updated weights for policy 0, policy_version 570 (0.1716) +[2024-10-02 03:18:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2334720. Throughput: 0: 205.3. Samples: 585276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:18:12,734][00821] Avg episode reward: [(0, '20.887')] +[2024-10-02 03:18:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2338816. Throughput: 0: 197.6. Samples: 586308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:18:17,736][00821] Avg episode reward: [(0, '20.789')] +[2024-10-02 03:18:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2342912. Throughput: 0: 208.8. Samples: 587700. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:18:22,735][00821] Avg episode reward: [(0, '21.188')] +[2024-10-02 03:18:25,134][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000573_2347008.pth... +[2024-10-02 03:18:25,254][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth +[2024-10-02 03:18:27,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2347008. Throughput: 0: 207.7. Samples: 588458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:18:27,732][00821] Avg episode reward: [(0, '21.765')] +[2024-10-02 03:18:32,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 2351104. Throughput: 0: 203.6. Samples: 589392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:18:32,739][00821] Avg episode reward: [(0, '21.353')] +[2024-10-02 03:18:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2355200. Throughput: 0: 202.6. Samples: 590682. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:18:37,737][00821] Avg episode reward: [(0, '21.744')] +[2024-10-02 03:18:42,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2359296. Throughput: 0: 205.2. Samples: 591442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:18:42,739][00821] Avg episode reward: [(0, '21.694')] +[2024-10-02 03:18:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2363392. Throughput: 0: 212.6. Samples: 592786. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:18:47,734][00821] Avg episode reward: [(0, '22.023')] +[2024-10-02 03:18:51,595][04725] Saving new best policy, reward=22.023! +[2024-10-02 03:18:52,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2367488. Throughput: 0: 195.9. Samples: 593800. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:18:52,740][00821] Avg episode reward: [(0, '21.951')] +[2024-10-02 03:18:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2371584. Throughput: 0: 206.0. Samples: 594548. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:18:57,740][00821] Avg episode reward: [(0, '21.263')] +[2024-10-02 03:19:00,439][04739] Updated weights for policy 0, policy_version 580 (0.1516) +[2024-10-02 03:19:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2375680. Throughput: 0: 214.5. Samples: 595960. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:02,742][00821] Avg episode reward: [(0, '21.439')] +[2024-10-02 03:19:07,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2379776. Throughput: 0: 206.8. Samples: 597008. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:07,735][00821] Avg episode reward: [(0, '21.439')] +[2024-10-02 03:19:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2383872. Throughput: 0: 202.0. Samples: 597548. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:19:12,732][00821] Avg episode reward: [(0, '21.252')] +[2024-10-02 03:19:17,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2387968. Throughput: 0: 208.9. Samples: 598792. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:19:17,731][00821] Avg episode reward: [(0, '20.864')] +[2024-10-02 03:19:22,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2392064. Throughput: 0: 209.2. Samples: 600096. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:19:22,737][00821] Avg episode reward: [(0, '20.852')] +[2024-10-02 03:19:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2396160. Throughput: 0: 206.4. Samples: 600732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:19:27,733][00821] Avg episode reward: [(0, '20.725')] +[2024-10-02 03:19:32,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2400256. Throughput: 0: 199.0. Samples: 601740. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:32,740][00821] Avg episode reward: [(0, '20.877')] +[2024-10-02 03:19:37,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2404352. Throughput: 0: 212.4. Samples: 603360. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:37,740][00821] Avg episode reward: [(0, '20.453')] +[2024-10-02 03:19:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2408448. Throughput: 0: 205.6. Samples: 603800. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:42,739][00821] Avg episode reward: [(0, '20.342')] +[2024-10-02 03:19:47,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2412544. Throughput: 0: 197.9. Samples: 604864. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:47,732][00821] Avg episode reward: [(0, '20.455')] +[2024-10-02 03:19:52,348][04739] Updated weights for policy 0, policy_version 590 (0.0576) +[2024-10-02 03:19:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2416640. Throughput: 0: 198.3. Samples: 605930. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:52,732][00821] Avg episode reward: [(0, '19.326')] +[2024-10-02 03:19:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2420736. Throughput: 0: 208.0. Samples: 606906. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:19:57,738][00821] Avg episode reward: [(0, '19.258')] +[2024-10-02 03:20:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2424832. Throughput: 0: 203.2. Samples: 607938. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:02,736][00821] Avg episode reward: [(0, '18.924')] +[2024-10-02 03:20:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2428928. Throughput: 0: 196.5. Samples: 608940. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:07,735][00821] Avg episode reward: [(0, '18.908')] +[2024-10-02 03:20:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2433024. Throughput: 0: 204.3. Samples: 609926. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:12,734][00821] Avg episode reward: [(0, '18.576')] +[2024-10-02 03:20:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2437120. Throughput: 0: 205.6. Samples: 610994. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:17,732][00821] Avg episode reward: [(0, '18.378')] +[2024-10-02 03:20:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2441216. Throughput: 0: 191.8. Samples: 611992. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:22,737][00821] Avg episode reward: [(0, '18.212')] +[2024-10-02 03:20:26,974][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000597_2445312.pth... +[2024-10-02 03:20:27,105][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000549_2248704.pth +[2024-10-02 03:20:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2445312. Throughput: 0: 202.4. Samples: 612910. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:27,733][00821] Avg episode reward: [(0, '18.065')] +[2024-10-02 03:20:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2449408. Throughput: 0: 204.0. Samples: 614042. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:32,732][00821] Avg episode reward: [(0, '19.312')] +[2024-10-02 03:20:37,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2453504. Throughput: 0: 208.8. Samples: 615328. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:37,737][00821] Avg episode reward: [(0, '18.967')] +[2024-10-02 03:20:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2457600. Throughput: 0: 201.6. Samples: 615978. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:42,738][00821] Avg episode reward: [(0, '18.812')] +[2024-10-02 03:20:42,841][04739] Updated weights for policy 0, policy_version 600 (0.2184) +[2024-10-02 03:20:45,684][04725] Signal inference workers to stop experience collection... (600 times) +[2024-10-02 03:20:45,731][04739] InferenceWorker_p0-w0: stopping experience collection (600 times) +[2024-10-02 03:20:46,746][04725] Signal inference workers to resume experience collection... (600 times) +[2024-10-02 03:20:46,747][04739] InferenceWorker_p0-w0: resuming experience collection (600 times) +[2024-10-02 03:20:47,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2461696. Throughput: 0: 204.4. Samples: 617138. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:20:47,732][00821] Avg episode reward: [(0, '18.411')] +[2024-10-02 03:20:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2465792. Throughput: 0: 215.4. Samples: 618632. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:52,732][00821] Avg episode reward: [(0, '17.805')] +[2024-10-02 03:20:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2469888. Throughput: 0: 204.5. Samples: 619130. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:20:57,732][00821] Avg episode reward: [(0, '17.835')] +[2024-10-02 03:21:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2473984. Throughput: 0: 203.9. Samples: 620168. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:02,734][00821] Avg episode reward: [(0, '18.543')] +[2024-10-02 03:21:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2478080. Throughput: 0: 211.6. Samples: 621516. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:07,732][00821] Avg episode reward: [(0, '18.648')] +[2024-10-02 03:21:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2482176. Throughput: 0: 205.6. Samples: 622160. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:21:12,735][00821] Avg episode reward: [(0, '19.223')] +[2024-10-02 03:21:17,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2486272. Throughput: 0: 203.1. Samples: 623184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:21:17,734][00821] Avg episode reward: [(0, '19.079')] +[2024-10-02 03:21:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2490368. Throughput: 0: 206.4. Samples: 624616. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:21:22,740][00821] Avg episode reward: [(0, '19.005')] +[2024-10-02 03:21:27,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2494464. Throughput: 0: 207.5. Samples: 625316. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:21:27,734][00821] Avg episode reward: [(0, '18.842')] +[2024-10-02 03:21:30,410][04739] Updated weights for policy 0, policy_version 610 (0.1522) +[2024-10-02 03:21:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2498560. Throughput: 0: 209.2. Samples: 626550. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:21:32,733][00821] Avg episode reward: [(0, '18.976')] +[2024-10-02 03:21:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2502656. Throughput: 0: 197.2. Samples: 627506. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:21:37,731][00821] Avg episode reward: [(0, '19.490')] +[2024-10-02 03:21:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2506752. Throughput: 0: 206.2. Samples: 628410. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:42,734][00821] Avg episode reward: [(0, '19.438')] +[2024-10-02 03:21:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2510848. Throughput: 0: 218.6. Samples: 630004. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:47,734][00821] Avg episode reward: [(0, '19.994')] +[2024-10-02 03:21:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2514944. Throughput: 0: 207.2. Samples: 630840. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:52,732][00821] Avg episode reward: [(0, '20.278')] +[2024-10-02 03:21:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2519040. Throughput: 0: 207.2. Samples: 631484. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:21:57,734][00821] Avg episode reward: [(0, '20.298')] +[2024-10-02 03:22:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2523136. Throughput: 0: 213.7. Samples: 632802. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:22:02,735][00821] Avg episode reward: [(0, '21.583')] +[2024-10-02 03:22:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2527232. Throughput: 0: 214.0. Samples: 634244. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:22:07,733][00821] Avg episode reward: [(0, '21.817')] +[2024-10-02 03:22:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2531328. Throughput: 0: 205.0. Samples: 634540. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:22:12,733][00821] Avg episode reward: [(0, '21.666')] +[2024-10-02 03:22:17,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2535424. Throughput: 0: 208.6. Samples: 635936. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:22:17,735][00821] Avg episode reward: [(0, '21.899')] +[2024-10-02 03:22:19,581][04739] Updated weights for policy 0, policy_version 620 (0.1014) +[2024-10-02 03:22:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2539520. Throughput: 0: 224.6. Samples: 637614. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 03:22:22,733][00821] Avg episode reward: [(0, '21.605')] +[2024-10-02 03:22:27,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2543616. Throughput: 0: 210.0. Samples: 637858. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 03:22:27,735][00821] Avg episode reward: [(0, '20.814')] +[2024-10-02 03:22:30,580][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000622_2547712.pth... +[2024-10-02 03:22:30,693][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000573_2347008.pth +[2024-10-02 03:22:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2547712. Throughput: 0: 197.6. Samples: 638894. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:32,732][00821] Avg episode reward: [(0, '21.537')] +[2024-10-02 03:22:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2551808. Throughput: 0: 217.1. Samples: 640608. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:37,737][00821] Avg episode reward: [(0, '21.511')] +[2024-10-02 03:22:42,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2555904. Throughput: 0: 212.9. Samples: 641064. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:42,742][00821] Avg episode reward: [(0, '21.788')] +[2024-10-02 03:22:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2560000. Throughput: 0: 205.7. Samples: 642058. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:47,732][00821] Avg episode reward: [(0, '22.146')] +[2024-10-02 03:22:50,044][04725] Saving new best policy, reward=22.146! +[2024-10-02 03:22:52,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2564096. Throughput: 0: 206.1. Samples: 643518. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:52,732][00821] Avg episode reward: [(0, '22.127')] +[2024-10-02 03:22:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2568192. Throughput: 0: 212.5. Samples: 644102. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:22:57,738][00821] Avg episode reward: [(0, '22.094')] +[2024-10-02 03:23:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2572288. Throughput: 0: 210.8. Samples: 645420. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:23:02,734][00821] Avg episode reward: [(0, '21.482')] +[2024-10-02 03:23:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2576384. Throughput: 0: 196.0. Samples: 646436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:23:07,732][00821] Avg episode reward: [(0, '20.849')] +[2024-10-02 03:23:09,483][04739] Updated weights for policy 0, policy_version 630 (0.0542) +[2024-10-02 03:23:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2580480. Throughput: 0: 205.4. Samples: 647100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:23:12,735][00821] Avg episode reward: [(0, '20.739')] +[2024-10-02 03:23:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2584576. Throughput: 0: 218.0. Samples: 648702. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:23:17,732][00821] Avg episode reward: [(0, '20.900')] +[2024-10-02 03:23:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2588672. Throughput: 0: 202.7. Samples: 649730. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:22,735][00821] Avg episode reward: [(0, '20.917')] +[2024-10-02 03:23:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2592768. Throughput: 0: 198.4. Samples: 649992. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:27,742][00821] Avg episode reward: [(0, '20.562')] +[2024-10-02 03:23:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2596864. Throughput: 0: 217.5. Samples: 651844. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:32,736][00821] Avg episode reward: [(0, '20.508')] +[2024-10-02 03:23:37,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2600960. Throughput: 0: 208.1. Samples: 652884. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:37,735][00821] Avg episode reward: [(0, '20.432')] +[2024-10-02 03:23:42,731][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2605056. Throughput: 0: 204.9. Samples: 653324. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:23:42,734][00821] Avg episode reward: [(0, '20.370')] +[2024-10-02 03:23:47,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2609152. Throughput: 0: 208.0. Samples: 654780. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:23:47,742][00821] Avg episode reward: [(0, '19.983')] +[2024-10-02 03:23:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2617344. Throughput: 0: 200.4. Samples: 655452. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:52,739][00821] Avg episode reward: [(0, '19.894')] +[2024-10-02 03:23:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2617344. Throughput: 0: 212.3. Samples: 656652. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:23:57,736][00821] Avg episode reward: [(0, '19.650')] +[2024-10-02 03:23:58,083][04739] Updated weights for policy 0, policy_version 640 (0.2204) +[2024-10-02 03:24:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2621440. Throughput: 0: 199.6. Samples: 657684. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:24:02,738][00821] Avg episode reward: [(0, '19.145')] +[2024-10-02 03:24:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2629632. Throughput: 0: 204.8. Samples: 658948. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:24:07,732][00821] Avg episode reward: [(0, '19.069')] +[2024-10-02 03:24:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2633728. Throughput: 0: 219.7. Samples: 659878. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:24:12,732][00821] Avg episode reward: [(0, '19.054')] +[2024-10-02 03:24:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2637824. Throughput: 0: 200.0. Samples: 660844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:24:17,736][00821] Avg episode reward: [(0, '18.880')] +[2024-10-02 03:24:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2641920. Throughput: 0: 201.5. Samples: 661950. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:24:22,737][00821] Avg episode reward: [(0, '19.287')] +[2024-10-02 03:24:26,726][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000646_2646016.pth... +[2024-10-02 03:24:26,836][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000597_2445312.pth +[2024-10-02 03:24:27,730][00821] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2646016. Throughput: 0: 216.0. Samples: 663046. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:24:27,732][00821] Avg episode reward: [(0, '19.389')] +[2024-10-02 03:24:32,732][00821] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 2650112. Throughput: 0: 207.3. Samples: 664108. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:24:32,739][00821] Avg episode reward: [(0, '20.420')] +[2024-10-02 03:24:37,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2654208. Throughput: 0: 215.6. Samples: 665154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:24:37,732][00821] Avg episode reward: [(0, '20.005')] +[2024-10-02 03:24:42,729][00821] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2658304. Throughput: 0: 211.6. Samples: 666176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:24:42,734][00821] Avg episode reward: [(0, '20.452')] +[2024-10-02 03:24:45,901][04739] Updated weights for policy 0, policy_version 650 (0.0678) +[2024-10-02 03:24:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2662400. Throughput: 0: 215.8. Samples: 667394. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:24:47,738][00821] Avg episode reward: [(0, '21.060')] +[2024-10-02 03:24:49,036][04725] Signal inference workers to stop experience collection... (650 times) +[2024-10-02 03:24:49,159][04739] InferenceWorker_p0-w0: stopping experience collection (650 times) +[2024-10-02 03:24:51,259][04725] Signal inference workers to resume experience collection... (650 times) +[2024-10-02 03:24:51,259][04739] InferenceWorker_p0-w0: resuming experience collection (650 times) +[2024-10-02 03:24:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2666496. Throughput: 0: 212.7. Samples: 668518. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:24:52,734][00821] Avg episode reward: [(0, '21.146')] +[2024-10-02 03:24:57,734][00821] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 2670592. Throughput: 0: 206.1. Samples: 669152. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:24:57,737][00821] Avg episode reward: [(0, '20.564')] +[2024-10-02 03:25:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 2674688. Throughput: 0: 211.2. Samples: 670348. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:02,732][00821] Avg episode reward: [(0, '20.438')] +[2024-10-02 03:25:07,729][00821] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2678784. Throughput: 0: 221.1. Samples: 671900. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:25:07,732][00821] Avg episode reward: [(0, '20.162')] +[2024-10-02 03:25:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2682880. Throughput: 0: 207.3. Samples: 672374. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:25:12,739][00821] Avg episode reward: [(0, '20.162')] +[2024-10-02 03:25:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2686976. Throughput: 0: 205.5. Samples: 673354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:17,732][00821] Avg episode reward: [(0, '20.520')] +[2024-10-02 03:25:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2691072. Throughput: 0: 219.9. Samples: 675048. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:22,732][00821] Avg episode reward: [(0, '20.420')] +[2024-10-02 03:25:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2695168. Throughput: 0: 206.8. Samples: 675482. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:25:27,732][00821] Avg episode reward: [(0, '20.133')] +[2024-10-02 03:25:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2699264. Throughput: 0: 201.0. Samples: 676438. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:25:32,731][00821] Avg episode reward: [(0, '20.482')] +[2024-10-02 03:25:36,187][04739] Updated weights for policy 0, policy_version 660 (0.1010) +[2024-10-02 03:25:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2703360. Throughput: 0: 208.0. Samples: 677880. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:37,733][00821] Avg episode reward: [(0, '20.411')] +[2024-10-02 03:25:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2707456. Throughput: 0: 208.6. Samples: 678536. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:42,739][00821] Avg episode reward: [(0, '20.931')] +[2024-10-02 03:25:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2711552. Throughput: 0: 210.0. Samples: 679800. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:25:47,732][00821] Avg episode reward: [(0, '20.672')] +[2024-10-02 03:25:52,742][00821] Fps is (10 sec: 818.2, 60 sec: 819.0, 300 sec: 833.0). Total num frames: 2715648. Throughput: 0: 197.9. Samples: 680806. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:25:52,744][00821] Avg episode reward: [(0, '20.160')] +[2024-10-02 03:25:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 2719744. Throughput: 0: 203.7. Samples: 681542. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:25:57,732][00821] Avg episode reward: [(0, '20.831')] +[2024-10-02 03:26:02,729][00821] Fps is (10 sec: 820.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2723840. Throughput: 0: 218.9. Samples: 683204. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:26:02,732][00821] Avg episode reward: [(0, '22.142')] +[2024-10-02 03:26:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2727936. Throughput: 0: 199.3. Samples: 684016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:07,736][00821] Avg episode reward: [(0, '21.546')] +[2024-10-02 03:26:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2732032. Throughput: 0: 202.1. Samples: 684578. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:12,732][00821] Avg episode reward: [(0, '21.628')] +[2024-10-02 03:26:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2736128. Throughput: 0: 216.8. Samples: 686192. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:26:17,738][00821] Avg episode reward: [(0, '21.489')] +[2024-10-02 03:26:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2740224. Throughput: 0: 211.3. Samples: 687390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:26:22,737][00821] Avg episode reward: [(0, '21.123')] +[2024-10-02 03:26:24,735][04739] Updated weights for policy 0, policy_version 670 (0.1518) +[2024-10-02 03:26:24,747][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000670_2744320.pth... +[2024-10-02 03:26:24,894][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000622_2547712.pth +[2024-10-02 03:26:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2744320. Throughput: 0: 203.9. Samples: 687710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:26:27,731][00821] Avg episode reward: [(0, '20.963')] +[2024-10-02 03:26:32,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2748416. Throughput: 0: 211.1. Samples: 689298. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:26:32,735][00821] Avg episode reward: [(0, '20.566')] +[2024-10-02 03:26:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2752512. Throughput: 0: 218.4. Samples: 690630. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:37,734][00821] Avg episode reward: [(0, '20.372')] +[2024-10-02 03:26:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2756608. Throughput: 0: 213.5. Samples: 691150. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:42,738][00821] Avg episode reward: [(0, '20.466')] +[2024-10-02 03:26:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2760704. Throughput: 0: 202.8. Samples: 692330. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:47,738][00821] Avg episode reward: [(0, '20.364')] +[2024-10-02 03:26:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.4, 300 sec: 833.1). Total num frames: 2764800. Throughput: 0: 215.2. Samples: 693702. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:26:52,731][00821] Avg episode reward: [(0, '20.462')] +[2024-10-02 03:26:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2768896. Throughput: 0: 221.9. Samples: 694564. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:26:57,734][00821] Avg episode reward: [(0, '20.611')] +[2024-10-02 03:27:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2772992. Throughput: 0: 205.3. Samples: 695432. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:27:02,736][00821] Avg episode reward: [(0, '20.742')] +[2024-10-02 03:27:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2777088. Throughput: 0: 208.2. Samples: 696760. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:27:07,739][00821] Avg episode reward: [(0, '20.401')] +[2024-10-02 03:27:12,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2785280. Throughput: 0: 217.4. Samples: 697494. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:27:12,731][00821] Avg episode reward: [(0, '20.483')] +[2024-10-02 03:27:12,822][04739] Updated weights for policy 0, policy_version 680 (0.2082) +[2024-10-02 03:27:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2785280. Throughput: 0: 205.8. Samples: 698560. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:27:17,733][00821] Avg episode reward: [(0, '20.615')] +[2024-10-02 03:27:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2789376. Throughput: 0: 201.1. Samples: 699678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:27:22,732][00821] Avg episode reward: [(0, '20.821')] +[2024-10-02 03:27:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2793472. Throughput: 0: 203.8. Samples: 700320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-10-02 03:27:27,739][00821] Avg episode reward: [(0, '21.051')] +[2024-10-02 03:27:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2801664. Throughput: 0: 213.2. Samples: 701926. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:27:32,736][00821] Avg episode reward: [(0, '20.669')] +[2024-10-02 03:27:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2801664. Throughput: 0: 205.2. Samples: 702936. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:27:37,736][00821] Avg episode reward: [(0, '21.022')] +[2024-10-02 03:27:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2805760. Throughput: 0: 197.0. Samples: 703428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:27:42,732][00821] Avg episode reward: [(0, '20.850')] +[2024-10-02 03:27:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2813952. Throughput: 0: 212.4. Samples: 704988. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:27:47,733][00821] Avg episode reward: [(0, '21.004')] +[2024-10-02 03:27:52,731][00821] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 847.0). Total num frames: 2818048. Throughput: 0: 206.3. Samples: 706044. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:27:52,734][00821] Avg episode reward: [(0, '21.004')] +[2024-10-02 03:27:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2818048. Throughput: 0: 204.8. Samples: 706710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:27:57,737][00821] Avg episode reward: [(0, '20.263')] +[2024-10-02 03:28:02,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2826240. Throughput: 0: 211.6. Samples: 708080. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:02,738][00821] Avg episode reward: [(0, '19.801')] +[2024-10-02 03:28:03,053][04739] Updated weights for policy 0, policy_version 690 (0.2115) +[2024-10-02 03:28:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2830336. Throughput: 0: 216.0. Samples: 709400. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:07,737][00821] Avg episode reward: [(0, '20.200')] +[2024-10-02 03:28:12,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 2834432. Throughput: 0: 217.5. Samples: 710108. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:12,735][00821] Avg episode reward: [(0, '19.586')] +[2024-10-02 03:28:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2838528. Throughput: 0: 203.3. Samples: 711076. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:17,738][00821] Avg episode reward: [(0, '19.356')] +[2024-10-02 03:28:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2842624. Throughput: 0: 209.0. Samples: 712342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:22,736][00821] Avg episode reward: [(0, '20.106')] +[2024-10-02 03:28:26,169][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000695_2846720.pth... +[2024-10-02 03:28:26,292][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000646_2646016.pth +[2024-10-02 03:28:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2846720. Throughput: 0: 218.4. Samples: 713254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:27,735][00821] Avg episode reward: [(0, '20.126')] +[2024-10-02 03:28:32,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 2850816. Throughput: 0: 205.9. Samples: 714252. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:28:32,735][00821] Avg episode reward: [(0, '19.895')] +[2024-10-02 03:28:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2854912. Throughput: 0: 207.6. Samples: 715384. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:28:37,734][00821] Avg episode reward: [(0, '20.105')] +[2024-10-02 03:28:42,729][00821] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2859008. Throughput: 0: 212.2. Samples: 716258. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:28:42,733][00821] Avg episode reward: [(0, '19.759')] +[2024-10-02 03:28:47,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 2863104. Throughput: 0: 207.9. Samples: 717436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:28:47,739][00821] Avg episode reward: [(0, '20.977')] +[2024-10-02 03:28:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 2867200. Throughput: 0: 193.1. Samples: 718088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:52,731][04739] Updated weights for policy 0, policy_version 700 (0.0543) +[2024-10-02 03:28:52,736][00821] Avg episode reward: [(0, '21.472')] +[2024-10-02 03:28:55,254][04725] Signal inference workers to stop experience collection... (700 times) +[2024-10-02 03:28:55,311][04739] InferenceWorker_p0-w0: stopping experience collection (700 times) +[2024-10-02 03:28:56,368][04725] Signal inference workers to resume experience collection... (700 times) +[2024-10-02 03:28:56,370][04739] InferenceWorker_p0-w0: resuming experience collection (700 times) +[2024-10-02 03:28:57,729][00821] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2871296. Throughput: 0: 206.3. Samples: 719392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:28:57,740][00821] Avg episode reward: [(0, '21.971')] +[2024-10-02 03:29:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2875392. Throughput: 0: 215.6. Samples: 720778. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:02,735][00821] Avg episode reward: [(0, '22.438')] +[2024-10-02 03:29:05,777][04725] Saving new best policy, reward=22.438! +[2024-10-02 03:29:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2879488. Throughput: 0: 209.2. Samples: 721756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:07,733][00821] Avg episode reward: [(0, '22.438')] +[2024-10-02 03:29:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2883584. Throughput: 0: 203.3. Samples: 722402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:12,745][00821] Avg episode reward: [(0, '22.397')] +[2024-10-02 03:29:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2887680. Throughput: 0: 208.1. Samples: 723614. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:17,736][00821] Avg episode reward: [(0, '22.273')] +[2024-10-02 03:29:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2891776. Throughput: 0: 216.2. Samples: 725112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:22,732][00821] Avg episode reward: [(0, '22.514')] +[2024-10-02 03:29:25,904][04725] Saving new best policy, reward=22.514! +[2024-10-02 03:29:27,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2895872. Throughput: 0: 205.7. Samples: 725516. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:27,737][00821] Avg episode reward: [(0, '22.136')] +[2024-10-02 03:29:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2899968. Throughput: 0: 207.0. Samples: 726748. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:32,732][00821] Avg episode reward: [(0, '22.544')] +[2024-10-02 03:29:34,988][04725] Saving new best policy, reward=22.544! +[2024-10-02 03:29:37,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2904064. Throughput: 0: 231.9. Samples: 728524. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:37,739][00821] Avg episode reward: [(0, '22.620')] +[2024-10-02 03:29:39,427][04739] Updated weights for policy 0, policy_version 710 (0.0993) +[2024-10-02 03:29:42,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2908160. Throughput: 0: 209.5. Samples: 728818. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:42,733][00821] Avg episode reward: [(0, '22.730')] +[2024-10-02 03:29:45,616][04725] Saving new best policy, reward=22.620! +[2024-10-02 03:29:45,813][04725] Saving new best policy, reward=22.730! +[2024-10-02 03:29:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 2912256. Throughput: 0: 199.8. Samples: 729768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:47,733][00821] Avg episode reward: [(0, '22.848')] +[2024-10-02 03:29:49,989][04725] Saving new best policy, reward=22.848! +[2024-10-02 03:29:52,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2916352. Throughput: 0: 214.8. Samples: 731422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:52,733][00821] Avg episode reward: [(0, '22.071')] +[2024-10-02 03:29:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2920448. Throughput: 0: 214.7. Samples: 732062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:29:57,732][00821] Avg episode reward: [(0, '22.864')] +[2024-10-02 03:30:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2924544. Throughput: 0: 209.8. Samples: 733056. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:30:02,733][00821] Avg episode reward: [(0, '23.121')] +[2024-10-02 03:30:05,517][04725] Saving new best policy, reward=22.864! +[2024-10-02 03:30:05,654][04725] Saving new best policy, reward=23.121! +[2024-10-02 03:30:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2928640. Throughput: 0: 207.2. Samples: 734436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:30:07,736][00821] Avg episode reward: [(0, '23.821')] +[2024-10-02 03:30:09,741][04725] Saving new best policy, reward=23.821! +[2024-10-02 03:30:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2932736. Throughput: 0: 209.8. Samples: 734956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:30:12,739][00821] Avg episode reward: [(0, '23.364')] +[2024-10-02 03:30:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2936832. Throughput: 0: 215.2. Samples: 736432. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:30:17,736][00821] Avg episode reward: [(0, '23.127')] +[2024-10-02 03:30:22,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2940928. Throughput: 0: 196.3. Samples: 737358. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:22,737][00821] Avg episode reward: [(0, '23.249')] +[2024-10-02 03:30:24,820][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000719_2945024.pth... +[2024-10-02 03:30:24,935][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000670_2744320.pth +[2024-10-02 03:30:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2945024. Throughput: 0: 203.8. Samples: 737988. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:27,735][00821] Avg episode reward: [(0, '23.149')] +[2024-10-02 03:30:28,919][04739] Updated weights for policy 0, policy_version 720 (0.0061) +[2024-10-02 03:30:32,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2949120. Throughput: 0: 221.8. Samples: 739748. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:32,734][00821] Avg episode reward: [(0, '23.198')] +[2024-10-02 03:30:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2953216. Throughput: 0: 208.5. Samples: 740804. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:37,736][00821] Avg episode reward: [(0, '23.251')] +[2024-10-02 03:30:42,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2957312. Throughput: 0: 199.7. Samples: 741048. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:42,740][00821] Avg episode reward: [(0, '23.074')] +[2024-10-02 03:30:47,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2961408. Throughput: 0: 216.7. Samples: 742806. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:47,733][00821] Avg episode reward: [(0, '22.885')] +[2024-10-02 03:30:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2969600. Throughput: 0: 211.2. Samples: 743940. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:52,736][00821] Avg episode reward: [(0, '22.604')] +[2024-10-02 03:30:57,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2969600. Throughput: 0: 212.6. Samples: 744522. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:30:57,732][00821] Avg episode reward: [(0, '22.412')] +[2024-10-02 03:31:02,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2973696. Throughput: 0: 207.5. Samples: 745768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:02,740][00821] Avg episode reward: [(0, '22.181')] +[2024-10-02 03:31:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2981888. Throughput: 0: 214.7. Samples: 747020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:07,732][00821] Avg episode reward: [(0, '22.695')] +[2024-10-02 03:31:12,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2985984. Throughput: 0: 222.9. Samples: 748020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:12,738][00821] Avg episode reward: [(0, '23.160')] +[2024-10-02 03:31:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2985984. Throughput: 0: 202.5. Samples: 748860. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:17,731][00821] Avg episode reward: [(0, '23.385')] +[2024-10-02 03:31:18,559][04739] Updated weights for policy 0, policy_version 730 (0.0531) +[2024-10-02 03:31:22,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2990080. Throughput: 0: 206.6. Samples: 750100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:22,733][00821] Avg episode reward: [(0, '23.058')] +[2024-10-02 03:31:27,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 2998272. Throughput: 0: 222.3. Samples: 751052. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:27,732][00821] Avg episode reward: [(0, '22.893')] +[2024-10-02 03:31:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 2998272. Throughput: 0: 204.5. Samples: 752006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:32,732][00821] Avg episode reward: [(0, '23.063')] +[2024-10-02 03:31:37,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3002368. Throughput: 0: 204.9. Samples: 753162. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:31:37,732][00821] Avg episode reward: [(0, '22.863')] +[2024-10-02 03:31:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3010560. Throughput: 0: 206.6. Samples: 753818. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:31:42,731][00821] Avg episode reward: [(0, '23.115')] +[2024-10-02 03:31:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3014656. Throughput: 0: 209.6. Samples: 755200. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:31:47,731][00821] Avg episode reward: [(0, '22.855')] +[2024-10-02 03:31:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 3014656. Throughput: 0: 204.5. Samples: 756224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:31:52,736][00821] Avg episode reward: [(0, '22.986')] +[2024-10-02 03:31:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3018752. Throughput: 0: 195.9. Samples: 756834. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:31:57,732][00821] Avg episode reward: [(0, '23.254')] +[2024-10-02 03:32:02,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3026944. Throughput: 0: 208.8. Samples: 758254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:32:02,736][00821] Avg episode reward: [(0, '23.273')] +[2024-10-02 03:32:06,646][04739] Updated weights for policy 0, policy_version 740 (0.1508) +[2024-10-02 03:32:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3031040. Throughput: 0: 208.1. Samples: 759464. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:32:07,739][00821] Avg episode reward: [(0, '22.852')] +[2024-10-02 03:32:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 3031040. Throughput: 0: 200.8. Samples: 760090. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:32:12,732][00821] Avg episode reward: [(0, '23.008')] +[2024-10-02 03:32:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3039232. Throughput: 0: 207.3. Samples: 761334. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:17,735][00821] Avg episode reward: [(0, '23.652')] +[2024-10-02 03:32:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3043328. Throughput: 0: 213.4. Samples: 762766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:22,737][00821] Avg episode reward: [(0, '23.465')] +[2024-10-02 03:32:26,656][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000744_3047424.pth... +[2024-10-02 03:32:26,787][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000695_2846720.pth +[2024-10-02 03:32:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3047424. Throughput: 0: 211.8. Samples: 763348. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:32:27,735][00821] Avg episode reward: [(0, '23.465')] +[2024-10-02 03:32:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3051520. Throughput: 0: 204.2. Samples: 764388. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:32:32,741][00821] Avg episode reward: [(0, '23.709')] +[2024-10-02 03:32:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3055616. Throughput: 0: 210.6. Samples: 765700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:37,735][00821] Avg episode reward: [(0, '23.758')] +[2024-10-02 03:32:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3059712. Throughput: 0: 213.6. Samples: 766446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:42,737][00821] Avg episode reward: [(0, '24.523')] +[2024-10-02 03:32:46,565][04725] Saving new best policy, reward=24.523! +[2024-10-02 03:32:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3063808. Throughput: 0: 204.8. Samples: 767472. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:47,736][00821] Avg episode reward: [(0, '23.982')] +[2024-10-02 03:32:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3067904. Throughput: 0: 206.0. Samples: 768736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:52,737][00821] Avg episode reward: [(0, '24.081')] +[2024-10-02 03:32:56,050][04739] Updated weights for policy 0, policy_version 750 (0.0043) +[2024-10-02 03:32:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3072000. Throughput: 0: 209.6. Samples: 769520. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:32:57,732][00821] Avg episode reward: [(0, '23.818')] +[2024-10-02 03:32:58,493][04725] Signal inference workers to stop experience collection... (750 times) +[2024-10-02 03:32:58,566][04739] InferenceWorker_p0-w0: stopping experience collection (750 times) +[2024-10-02 03:33:00,344][04725] Signal inference workers to resume experience collection... (750 times) +[2024-10-02 03:33:00,345][04739] InferenceWorker_p0-w0: resuming experience collection (750 times) +[2024-10-02 03:33:02,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3076096. Throughput: 0: 209.6. Samples: 770766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:33:02,733][00821] Avg episode reward: [(0, '23.873')] +[2024-10-02 03:33:07,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3080192. Throughput: 0: 196.6. Samples: 771612. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:33:07,735][00821] Avg episode reward: [(0, '23.970')] +[2024-10-02 03:33:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3080192. Throughput: 0: 197.6. Samples: 772238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:33:12,732][00821] Avg episode reward: [(0, '24.138')] +[2024-10-02 03:33:17,729][00821] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3084288. Throughput: 0: 190.2. Samples: 772946. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:33:17,742][00821] Avg episode reward: [(0, '23.756')] +[2024-10-02 03:33:22,730][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3088384. Throughput: 0: 178.6. Samples: 773738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:33:22,733][00821] Avg episode reward: [(0, '23.674')] +[2024-10-02 03:33:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 805.3). Total num frames: 3088384. Throughput: 0: 176.8. Samples: 774402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:33:27,738][00821] Avg episode reward: [(0, '23.563')] +[2024-10-02 03:33:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3096576. Throughput: 0: 182.1. Samples: 775668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:32,744][00821] Avg episode reward: [(0, '23.495')] +[2024-10-02 03:33:37,733][00821] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3100672. Throughput: 0: 184.7. Samples: 777046. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:37,745][00821] Avg episode reward: [(0, '22.708')] +[2024-10-02 03:33:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3104768. Throughput: 0: 183.0. Samples: 777754. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:42,738][00821] Avg episode reward: [(0, '22.394')] +[2024-10-02 03:33:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3108864. Throughput: 0: 178.5. Samples: 778798. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:47,733][00821] Avg episode reward: [(0, '22.234')] +[2024-10-02 03:33:52,330][04739] Updated weights for policy 0, policy_version 760 (0.1128) +[2024-10-02 03:33:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3112960. Throughput: 0: 185.2. Samples: 779948. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:52,732][00821] Avg episode reward: [(0, '22.028')] +[2024-10-02 03:33:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3117056. Throughput: 0: 193.5. Samples: 780946. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:33:57,733][00821] Avg episode reward: [(0, '22.409')] +[2024-10-02 03:34:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3121152. Throughput: 0: 199.7. Samples: 781934. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:02,732][00821] Avg episode reward: [(0, '21.990')] +[2024-10-02 03:34:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3125248. Throughput: 0: 192.7. Samples: 782410. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:07,732][00821] Avg episode reward: [(0, '22.147')] +[2024-10-02 03:34:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3129344. Throughput: 0: 213.0. Samples: 783988. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:12,735][00821] Avg episode reward: [(0, '22.354')] +[2024-10-02 03:34:17,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 3133440. Throughput: 0: 208.9. Samples: 785068. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:17,738][00821] Avg episode reward: [(0, '22.170')] +[2024-10-02 03:34:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3137536. Throughput: 0: 201.0. Samples: 786090. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:22,737][00821] Avg episode reward: [(0, '22.058')] +[2024-10-02 03:34:26,788][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000767_3141632.pth... +[2024-10-02 03:34:26,904][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000719_2945024.pth +[2024-10-02 03:34:27,729][00821] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 3141632. Throughput: 0: 207.8. Samples: 787104. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:27,732][00821] Avg episode reward: [(0, '21.798')] +[2024-10-02 03:34:32,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 3145728. Throughput: 0: 210.4. Samples: 788268. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:34:32,738][00821] Avg episode reward: [(0, '22.454')] +[2024-10-02 03:34:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3149824. Throughput: 0: 207.5. Samples: 789284. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:34:37,734][00821] Avg episode reward: [(0, '22.299')] +[2024-10-02 03:34:42,515][04739] Updated weights for policy 0, policy_version 770 (0.1021) +[2024-10-02 03:34:42,729][00821] Fps is (10 sec: 819.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3153920. Throughput: 0: 202.2. Samples: 790044. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:42,732][00821] Avg episode reward: [(0, '21.896')] +[2024-10-02 03:34:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3158016. Throughput: 0: 208.5. Samples: 791316. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:34:47,732][00821] Avg episode reward: [(0, '21.908')] +[2024-10-02 03:34:52,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 3162112. Throughput: 0: 224.9. Samples: 792530. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:34:52,740][00821] Avg episode reward: [(0, '22.199')] +[2024-10-02 03:34:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3166208. Throughput: 0: 204.1. Samples: 793174. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:34:57,740][00821] Avg episode reward: [(0, '21.889')] +[2024-10-02 03:35:02,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3170304. Throughput: 0: 205.4. Samples: 794308. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:02,732][00821] Avg episode reward: [(0, '21.547')] +[2024-10-02 03:35:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3174400. Throughput: 0: 217.9. Samples: 795894. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:07,737][00821] Avg episode reward: [(0, '21.094')] +[2024-10-02 03:35:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3178496. Throughput: 0: 205.7. Samples: 796362. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:12,737][00821] Avg episode reward: [(0, '20.926')] +[2024-10-02 03:35:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 3182592. Throughput: 0: 201.7. Samples: 797342. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:17,738][00821] Avg episode reward: [(0, '20.611')] +[2024-10-02 03:35:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3186688. Throughput: 0: 213.2. Samples: 798876. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 03:35:22,735][00821] Avg episode reward: [(0, '20.462')] +[2024-10-02 03:35:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3190784. Throughput: 0: 212.4. Samples: 799604. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-10-02 03:35:27,737][00821] Avg episode reward: [(0, '20.553')] +[2024-10-02 03:35:31,297][04739] Updated weights for policy 0, policy_version 780 (0.3251) +[2024-10-02 03:35:32,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 3194880. Throughput: 0: 203.2. Samples: 800460. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:32,746][00821] Avg episode reward: [(0, '20.479')] +[2024-10-02 03:35:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3198976. Throughput: 0: 210.1. Samples: 801984. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:37,737][00821] Avg episode reward: [(0, '20.878')] +[2024-10-02 03:35:42,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3203072. Throughput: 0: 208.2. Samples: 802544. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:42,735][00821] Avg episode reward: [(0, '21.741')] +[2024-10-02 03:35:47,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3207168. Throughput: 0: 214.0. Samples: 803938. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:47,738][00821] Avg episode reward: [(0, '22.148')] +[2024-10-02 03:35:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 3211264. Throughput: 0: 200.6. Samples: 804922. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:52,732][00821] Avg episode reward: [(0, '22.078')] +[2024-10-02 03:35:57,730][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3215360. Throughput: 0: 203.5. Samples: 805520. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:35:57,734][00821] Avg episode reward: [(0, '23.484')] +[2024-10-02 03:36:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3219456. Throughput: 0: 218.2. Samples: 807160. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:36:02,741][00821] Avg episode reward: [(0, '23.809')] +[2024-10-02 03:36:07,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3223552. Throughput: 0: 208.2. Samples: 808244. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:36:07,738][00821] Avg episode reward: [(0, '23.761')] +[2024-10-02 03:36:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3227648. Throughput: 0: 197.9. Samples: 808508. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:12,739][00821] Avg episode reward: [(0, '23.166')] +[2024-10-02 03:36:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3231744. Throughput: 0: 215.8. Samples: 810172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:17,740][00821] Avg episode reward: [(0, '22.879')] +[2024-10-02 03:36:18,891][04739] Updated weights for policy 0, policy_version 790 (0.0538) +[2024-10-02 03:36:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3235840. Throughput: 0: 212.0. Samples: 811524. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:22,732][00821] Avg episode reward: [(0, '22.905')] +[2024-10-02 03:36:27,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3239936. Throughput: 0: 205.8. Samples: 811804. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:27,740][00821] Avg episode reward: [(0, '23.190')] +[2024-10-02 03:36:30,076][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000792_3244032.pth... +[2024-10-02 03:36:30,178][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000744_3047424.pth +[2024-10-02 03:36:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3244032. Throughput: 0: 202.7. Samples: 813058. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:32,736][00821] Avg episode reward: [(0, '23.686')] +[2024-10-02 03:36:37,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3248128. Throughput: 0: 215.6. Samples: 814626. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:36:37,739][00821] Avg episode reward: [(0, '22.998')] +[2024-10-02 03:36:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3252224. Throughput: 0: 211.4. Samples: 815034. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:36:42,736][00821] Avg episode reward: [(0, '23.555')] +[2024-10-02 03:36:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3256320. Throughput: 0: 197.9. Samples: 816064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:36:47,741][00821] Avg episode reward: [(0, '23.192')] +[2024-10-02 03:36:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3260416. Throughput: 0: 207.2. Samples: 817570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:36:52,732][00821] Avg episode reward: [(0, '23.993')] +[2024-10-02 03:36:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3264512. Throughput: 0: 215.8. Samples: 818220. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:36:57,732][00821] Avg episode reward: [(0, '23.712')] +[2024-10-02 03:37:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3268608. Throughput: 0: 206.2. Samples: 819452. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:02,735][00821] Avg episode reward: [(0, '23.226')] +[2024-10-02 03:37:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3272704. Throughput: 0: 205.2. Samples: 820758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:07,740][00821] Avg episode reward: [(0, '23.675')] +[2024-10-02 03:37:08,837][04739] Updated weights for policy 0, policy_version 800 (0.1476) +[2024-10-02 03:37:11,331][04725] Signal inference workers to stop experience collection... (800 times) +[2024-10-02 03:37:11,406][04739] InferenceWorker_p0-w0: stopping experience collection (800 times) +[2024-10-02 03:37:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3276800. Throughput: 0: 212.9. Samples: 821386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:12,732][00821] Avg episode reward: [(0, '23.675')] +[2024-10-02 03:37:12,860][04725] Signal inference workers to resume experience collection... (800 times) +[2024-10-02 03:37:12,860][04739] InferenceWorker_p0-w0: resuming experience collection (800 times) +[2024-10-02 03:37:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3280896. Throughput: 0: 216.7. Samples: 822808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:17,733][00821] Avg episode reward: [(0, '24.753')] +[2024-10-02 03:37:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3284992. Throughput: 0: 201.2. Samples: 823682. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:22,746][00821] Avg episode reward: [(0, '24.184')] +[2024-10-02 03:37:23,779][04725] Saving new best policy, reward=24.753! +[2024-10-02 03:37:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3289088. Throughput: 0: 210.2. Samples: 824492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:27,741][00821] Avg episode reward: [(0, '24.126')] +[2024-10-02 03:37:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 3297280. Throughput: 0: 217.9. Samples: 825868. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:37:32,735][00821] Avg episode reward: [(0, '24.361')] +[2024-10-02 03:37:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3297280. Throughput: 0: 207.6. Samples: 826914. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:37:37,733][00821] Avg episode reward: [(0, '23.976')] +[2024-10-02 03:37:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3301376. Throughput: 0: 205.1. Samples: 827448. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:37:42,735][00821] Avg episode reward: [(0, '24.501')] +[2024-10-02 03:37:47,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 3309568. Throughput: 0: 212.7. Samples: 829024. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:47,736][00821] Avg episode reward: [(0, '24.503')] +[2024-10-02 03:37:52,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 3313664. Throughput: 0: 208.9. Samples: 830158. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:52,734][00821] Avg episode reward: [(0, '24.444')] +[2024-10-02 03:37:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 3313664. Throughput: 0: 209.2. Samples: 830802. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:37:57,733][00821] Avg episode reward: [(0, '23.895')] +[2024-10-02 03:37:57,798][04739] Updated weights for policy 0, policy_version 810 (0.1820) +[2024-10-02 03:38:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 3321856. Throughput: 0: 207.2. Samples: 832130. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:38:02,740][00821] Avg episode reward: [(0, '22.828')] +[2024-10-02 03:38:07,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3325952. Throughput: 0: 212.1. Samples: 833226. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:38:07,735][00821] Avg episode reward: [(0, '22.434')] +[2024-10-02 03:38:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3330048. Throughput: 0: 215.1. Samples: 834172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:38:12,736][00821] Avg episode reward: [(0, '22.881')] +[2024-10-02 03:38:17,730][00821] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3334144. Throughput: 0: 206.6. Samples: 835166. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:38:17,737][00821] Avg episode reward: [(0, '22.881')] +[2024-10-02 03:38:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3338240. Throughput: 0: 206.4. Samples: 836200. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-10-02 03:38:22,738][00821] Avg episode reward: [(0, '24.172')] +[2024-10-02 03:38:26,046][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000816_3342336.pth... +[2024-10-02 03:38:26,151][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000767_3141632.pth +[2024-10-02 03:38:27,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3342336. Throughput: 0: 216.3. Samples: 837182. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:38:27,732][00821] Avg episode reward: [(0, '23.915')] +[2024-10-02 03:38:32,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 3346432. Throughput: 0: 203.0. Samples: 838158. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-10-02 03:38:32,738][00821] Avg episode reward: [(0, '24.125')] +[2024-10-02 03:38:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3350528. Throughput: 0: 201.2. Samples: 839212. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:38:37,734][00821] Avg episode reward: [(0, '24.416')] +[2024-10-02 03:38:42,729][00821] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3354624. Throughput: 0: 207.5. Samples: 840138. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:38:42,732][00821] Avg episode reward: [(0, '23.689')] +[2024-10-02 03:38:45,477][04739] Updated weights for policy 0, policy_version 820 (0.1026) +[2024-10-02 03:38:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3358720. Throughput: 0: 208.0. Samples: 841490. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:38:47,735][00821] Avg episode reward: [(0, '23.669')] +[2024-10-02 03:38:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3362816. Throughput: 0: 202.8. Samples: 842350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:38:52,732][00821] Avg episode reward: [(0, '23.670')] +[2024-10-02 03:38:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3366912. Throughput: 0: 198.5. Samples: 843104. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:38:57,735][00821] Avg episode reward: [(0, '22.850')] +[2024-10-02 03:39:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3371008. Throughput: 0: 200.1. Samples: 844172. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:39:02,732][00821] Avg episode reward: [(0, '22.886')] +[2024-10-02 03:39:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3375104. Throughput: 0: 209.3. Samples: 845620. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:07,733][00821] Avg episode reward: [(0, '22.853')] +[2024-10-02 03:39:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3379200. Throughput: 0: 198.0. Samples: 846094. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:12,733][00821] Avg episode reward: [(0, '22.831')] +[2024-10-02 03:39:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3383296. Throughput: 0: 204.0. Samples: 847336. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:17,732][00821] Avg episode reward: [(0, '23.128')] +[2024-10-02 03:39:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3387392. Throughput: 0: 220.4. Samples: 849132. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:22,734][00821] Avg episode reward: [(0, '22.541')] +[2024-10-02 03:39:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3391488. Throughput: 0: 203.1. Samples: 849276. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:27,732][00821] Avg episode reward: [(0, '22.826')] +[2024-10-02 03:39:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 3395584. Throughput: 0: 195.6. Samples: 850290. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:32,737][00821] Avg episode reward: [(0, '23.588')] +[2024-10-02 03:39:35,757][04739] Updated weights for policy 0, policy_version 830 (0.1508) +[2024-10-02 03:39:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3399680. Throughput: 0: 212.3. Samples: 851904. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:37,732][00821] Avg episode reward: [(0, '23.519')] +[2024-10-02 03:39:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3403776. Throughput: 0: 212.0. Samples: 852646. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:42,737][00821] Avg episode reward: [(0, '22.645')] +[2024-10-02 03:39:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3407872. Throughput: 0: 207.9. Samples: 853528. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:47,732][00821] Avg episode reward: [(0, '22.126')] +[2024-10-02 03:39:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3411968. Throughput: 0: 209.9. Samples: 855066. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:52,740][00821] Avg episode reward: [(0, '22.683')] +[2024-10-02 03:39:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3416064. Throughput: 0: 212.4. Samples: 855654. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:39:57,741][00821] Avg episode reward: [(0, '22.678')] +[2024-10-02 03:40:02,731][00821] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3420160. Throughput: 0: 212.5. Samples: 856900. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:40:02,736][00821] Avg episode reward: [(0, '22.606')] +[2024-10-02 03:40:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3424256. Throughput: 0: 195.2. Samples: 857918. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:07,732][00821] Avg episode reward: [(0, '21.839')] +[2024-10-02 03:40:12,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3428352. Throughput: 0: 206.8. Samples: 858582. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:12,731][00821] Avg episode reward: [(0, '22.121')] +[2024-10-02 03:40:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3432448. Throughput: 0: 220.0. Samples: 860188. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:17,732][00821] Avg episode reward: [(0, '22.528')] +[2024-10-02 03:40:22,732][00821] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3436544. Throughput: 0: 205.8. Samples: 861164. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:22,736][00821] Avg episode reward: [(0, '22.602')] +[2024-10-02 03:40:25,552][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000840_3440640.pth... +[2024-10-02 03:40:25,565][04739] Updated weights for policy 0, policy_version 840 (0.1205) +[2024-10-02 03:40:25,669][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000792_3244032.pth +[2024-10-02 03:40:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3440640. Throughput: 0: 200.5. Samples: 861670. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:27,732][00821] Avg episode reward: [(0, '22.314')] +[2024-10-02 03:40:32,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3444736. Throughput: 0: 212.7. Samples: 863100. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:40:32,732][00821] Avg episode reward: [(0, '22.141')] +[2024-10-02 03:40:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3448832. Throughput: 0: 209.3. Samples: 864484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:40:37,734][00821] Avg episode reward: [(0, '22.010')] +[2024-10-02 03:40:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3452928. Throughput: 0: 201.7. Samples: 864732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:40:42,735][00821] Avg episode reward: [(0, '22.533')] +[2024-10-02 03:40:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3457024. Throughput: 0: 207.7. Samples: 866244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:40:47,732][00821] Avg episode reward: [(0, '23.135')] +[2024-10-02 03:40:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3461120. Throughput: 0: 218.6. Samples: 867754. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:40:52,741][00821] Avg episode reward: [(0, '22.840')] +[2024-10-02 03:40:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3465216. Throughput: 0: 213.1. Samples: 868170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:40:57,734][00821] Avg episode reward: [(0, '23.365')] +[2024-10-02 03:41:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3469312. Throughput: 0: 197.8. Samples: 869090. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:41:02,737][00821] Avg episode reward: [(0, '23.455')] +[2024-10-02 03:41:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3473408. Throughput: 0: 212.0. Samples: 870702. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:41:07,738][00821] Avg episode reward: [(0, '24.021')] +[2024-10-02 03:41:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3477504. Throughput: 0: 217.5. Samples: 871458. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:41:12,732][00821] Avg episode reward: [(0, '24.286')] +[2024-10-02 03:41:13,093][04739] Updated weights for policy 0, policy_version 850 (0.2164) +[2024-10-02 03:41:17,526][04725] Signal inference workers to stop experience collection... (850 times) +[2024-10-02 03:41:17,585][04739] InferenceWorker_p0-w0: stopping experience collection (850 times) +[2024-10-02 03:41:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3481600. Throughput: 0: 209.2. Samples: 872516. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:17,732][00821] Avg episode reward: [(0, '24.901')] +[2024-10-02 03:41:19,344][04725] Signal inference workers to resume experience collection... (850 times) +[2024-10-02 03:41:19,345][04739] InferenceWorker_p0-w0: resuming experience collection (850 times) +[2024-10-02 03:41:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3485696. Throughput: 0: 205.7. Samples: 873742. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:22,736][00821] Avg episode reward: [(0, '24.975')] +[2024-10-02 03:41:23,713][04725] Saving new best policy, reward=24.901! +[2024-10-02 03:41:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3489792. Throughput: 0: 217.3. Samples: 874512. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:27,739][00821] Avg episode reward: [(0, '25.065')] +[2024-10-02 03:41:27,941][04725] Saving new best policy, reward=24.975! +[2024-10-02 03:41:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3493888. Throughput: 0: 214.1. Samples: 875878. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:32,732][00821] Avg episode reward: [(0, '25.460')] +[2024-10-02 03:41:33,622][04725] Saving new best policy, reward=25.065! +[2024-10-02 03:41:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3497984. Throughput: 0: 198.3. Samples: 876678. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:41:37,732][00821] Avg episode reward: [(0, '25.379')] +[2024-10-02 03:41:39,438][04725] Saving new best policy, reward=25.460! +[2024-10-02 03:41:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3502080. Throughput: 0: 202.9. Samples: 877302. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:41:42,741][00821] Avg episode reward: [(0, '25.837')] +[2024-10-02 03:41:47,700][04725] Saving new best policy, reward=25.837! +[2024-10-02 03:41:47,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3510272. Throughput: 0: 216.7. Samples: 878840. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:47,739][00821] Avg episode reward: [(0, '25.465')] +[2024-10-02 03:41:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3510272. Throughput: 0: 206.5. Samples: 879994. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-10-02 03:41:52,732][00821] Avg episode reward: [(0, '25.615')] +[2024-10-02 03:41:57,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3514368. Throughput: 0: 196.3. Samples: 880292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:41:57,741][00821] Avg episode reward: [(0, '26.487')] +[2024-10-02 03:42:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3518464. Throughput: 0: 203.7. Samples: 881682. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:02,732][00821] Avg episode reward: [(0, '26.328')] +[2024-10-02 03:42:03,345][04725] Saving new best policy, reward=26.487! +[2024-10-02 03:42:03,360][04739] Updated weights for policy 0, policy_version 860 (0.1592) +[2024-10-02 03:42:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3522560. Throughput: 0: 208.4. Samples: 883118. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:07,732][00821] Avg episode reward: [(0, '26.233')] +[2024-10-02 03:42:12,733][00821] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 3526656. Throughput: 0: 202.2. Samples: 883614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:12,738][00821] Avg episode reward: [(0, '27.174')] +[2024-10-02 03:42:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3530752. Throughput: 0: 199.6. Samples: 884862. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:17,741][00821] Avg episode reward: [(0, '27.287')] +[2024-10-02 03:42:18,491][04725] Saving new best policy, reward=27.174! +[2024-10-02 03:42:22,714][04725] Saving new best policy, reward=27.287! +[2024-10-02 03:42:22,729][00821] Fps is (10 sec: 1229.3, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3538944. Throughput: 0: 211.4. Samples: 886192. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:22,734][00821] Avg episode reward: [(0, '26.834')] +[2024-10-02 03:42:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3538944. Throughput: 0: 218.0. Samples: 887110. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:27,732][00821] Avg episode reward: [(0, '26.978')] +[2024-10-02 03:42:28,006][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth... +[2024-10-02 03:42:28,164][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000816_3342336.pth +[2024-10-02 03:42:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3543040. Throughput: 0: 201.6. Samples: 887910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:32,734][00821] Avg episode reward: [(0, '26.836')] +[2024-10-02 03:42:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3547136. Throughput: 0: 205.6. Samples: 889246. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:37,732][00821] Avg episode reward: [(0, '26.182')] +[2024-10-02 03:42:42,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3555328. Throughput: 0: 219.4. Samples: 890164. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:42,734][00821] Avg episode reward: [(0, '26.233')] +[2024-10-02 03:42:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 3555328. Throughput: 0: 212.8. Samples: 891258. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:47,733][00821] Avg episode reward: [(0, '26.137')] +[2024-10-02 03:42:52,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3559424. Throughput: 0: 204.9. Samples: 892340. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:52,738][00821] Avg episode reward: [(0, '26.264')] +[2024-10-02 03:42:53,934][04739] Updated weights for policy 0, policy_version 870 (0.1627) +[2024-10-02 03:42:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3567616. Throughput: 0: 209.9. Samples: 893058. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:42:57,736][00821] Avg episode reward: [(0, '26.280')] +[2024-10-02 03:43:02,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3571712. Throughput: 0: 213.2. Samples: 894456. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:02,737][00821] Avg episode reward: [(0, '25.077')] +[2024-10-02 03:43:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3571712. Throughput: 0: 206.1. Samples: 895468. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:07,732][00821] Avg episode reward: [(0, '25.014')] +[2024-10-02 03:43:12,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 819.2). Total num frames: 3575808. Throughput: 0: 199.8. Samples: 896102. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:12,739][00821] Avg episode reward: [(0, '24.882')] +[2024-10-02 03:43:17,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3584000. Throughput: 0: 214.3. Samples: 897554. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:17,732][00821] Avg episode reward: [(0, '25.056')] +[2024-10-02 03:43:22,729][00821] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3588096. Throughput: 0: 209.4. Samples: 898670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:43:22,738][00821] Avg episode reward: [(0, '24.639')] +[2024-10-02 03:43:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3588096. Throughput: 0: 203.4. Samples: 899316. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:43:27,732][00821] Avg episode reward: [(0, '25.745')] +[2024-10-02 03:43:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3596288. Throughput: 0: 206.9. Samples: 900570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:43:32,742][00821] Avg episode reward: [(0, '24.698')] +[2024-10-02 03:43:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3600384. Throughput: 0: 214.0. Samples: 901972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:43:37,732][00821] Avg episode reward: [(0, '24.401')] +[2024-10-02 03:43:42,083][04739] Updated weights for policy 0, policy_version 880 (0.1501) +[2024-10-02 03:43:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3604480. Throughput: 0: 213.2. Samples: 902652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:42,736][00821] Avg episode reward: [(0, '24.148')] +[2024-10-02 03:43:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3608576. Throughput: 0: 204.0. Samples: 903638. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:47,740][00821] Avg episode reward: [(0, '24.255')] +[2024-10-02 03:43:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3612672. Throughput: 0: 210.2. Samples: 904928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:52,732][00821] Avg episode reward: [(0, '24.009')] +[2024-10-02 03:43:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3616768. Throughput: 0: 213.5. Samples: 905708. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:43:57,738][00821] Avg episode reward: [(0, '24.102')] +[2024-10-02 03:44:02,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 3620864. Throughput: 0: 204.5. Samples: 906758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:44:02,738][00821] Avg episode reward: [(0, '23.940')] +[2024-10-02 03:44:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3624960. Throughput: 0: 189.9. Samples: 907216. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:44:07,743][00821] Avg episode reward: [(0, '24.111')] +[2024-10-02 03:44:12,731][00821] Fps is (10 sec: 819.5, 60 sec: 887.4, 300 sec: 833.1). Total num frames: 3629056. Throughput: 0: 210.6. Samples: 908792. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:12,737][00821] Avg episode reward: [(0, '24.263')] +[2024-10-02 03:44:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3633152. Throughput: 0: 208.4. Samples: 909948. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:17,738][00821] Avg episode reward: [(0, '23.367')] +[2024-10-02 03:44:22,729][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3637248. Throughput: 0: 198.8. Samples: 910916. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:22,738][00821] Avg episode reward: [(0, '23.288')] +[2024-10-02 03:44:26,610][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000889_3641344.pth... +[2024-10-02 03:44:26,729][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000840_3440640.pth +[2024-10-02 03:44:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3641344. Throughput: 0: 205.6. Samples: 911904. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:27,740][00821] Avg episode reward: [(0, '23.795')] +[2024-10-02 03:44:31,212][04739] Updated weights for policy 0, policy_version 890 (0.2128) +[2024-10-02 03:44:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3645440. Throughput: 0: 207.8. Samples: 912990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:32,732][00821] Avg episode reward: [(0, '23.883')] +[2024-10-02 03:44:37,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3649536. Throughput: 0: 207.8. Samples: 914278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:37,735][00821] Avg episode reward: [(0, '24.014')] +[2024-10-02 03:44:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3653632. Throughput: 0: 204.2. Samples: 914898. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:42,738][00821] Avg episode reward: [(0, '24.561')] +[2024-10-02 03:44:47,730][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3657728. Throughput: 0: 207.8. Samples: 916110. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:47,735][00821] Avg episode reward: [(0, '23.875')] +[2024-10-02 03:44:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3661824. Throughput: 0: 230.0. Samples: 917568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:52,735][00821] Avg episode reward: [(0, '23.777')] +[2024-10-02 03:44:57,734][00821] Fps is (10 sec: 818.8, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 3665920. Throughput: 0: 205.8. Samples: 918052. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:44:57,737][00821] Avg episode reward: [(0, '23.777')] +[2024-10-02 03:45:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 3670016. Throughput: 0: 202.7. Samples: 919070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:02,732][00821] Avg episode reward: [(0, '24.088')] +[2024-10-02 03:45:07,729][00821] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3674112. Throughput: 0: 219.8. Samples: 920808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:07,732][00821] Avg episode reward: [(0, '24.089')] +[2024-10-02 03:45:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3678208. Throughput: 0: 205.6. Samples: 921158. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:12,731][00821] Avg episode reward: [(0, '24.153')] +[2024-10-02 03:45:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3682304. Throughput: 0: 206.4. Samples: 922276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:17,736][00821] Avg episode reward: [(0, '24.562')] +[2024-10-02 03:45:20,727][04739] Updated weights for policy 0, policy_version 900 (0.1596) +[2024-10-02 03:45:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3686400. Throughput: 0: 211.5. Samples: 923796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:22,734][00821] Avg episode reward: [(0, '24.487')] +[2024-10-02 03:45:23,139][04725] Signal inference workers to stop experience collection... (900 times) +[2024-10-02 03:45:23,193][04739] InferenceWorker_p0-w0: stopping experience collection (900 times) +[2024-10-02 03:45:24,719][04725] Signal inference workers to resume experience collection... (900 times) +[2024-10-02 03:45:24,721][04739] InferenceWorker_p0-w0: resuming experience collection (900 times) +[2024-10-02 03:45:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3690496. Throughput: 0: 206.7. Samples: 924200. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:27,737][00821] Avg episode reward: [(0, '24.325')] +[2024-10-02 03:45:32,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3694592. Throughput: 0: 211.9. Samples: 925646. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:32,734][00821] Avg episode reward: [(0, '24.727')] +[2024-10-02 03:45:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3698688. Throughput: 0: 204.0. Samples: 926750. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:45:37,732][00821] Avg episode reward: [(0, '24.027')] +[2024-10-02 03:45:42,730][00821] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3702784. Throughput: 0: 207.0. Samples: 927364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:45:42,740][00821] Avg episode reward: [(0, '24.239')] +[2024-10-02 03:45:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3706880. Throughput: 0: 224.7. Samples: 929182. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:45:47,732][00821] Avg episode reward: [(0, '24.054')] +[2024-10-02 03:45:52,732][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3710976. Throughput: 0: 205.3. Samples: 930048. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:45:52,737][00821] Avg episode reward: [(0, '24.263')] +[2024-10-02 03:45:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 833.1). Total num frames: 3715072. Throughput: 0: 203.5. Samples: 930314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:45:57,732][00821] Avg episode reward: [(0, '24.405')] +[2024-10-02 03:46:02,729][00821] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3719168. Throughput: 0: 216.4. Samples: 932016. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:02,732][00821] Avg episode reward: [(0, '24.443')] +[2024-10-02 03:46:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3723264. Throughput: 0: 212.6. Samples: 933362. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:07,733][00821] Avg episode reward: [(0, '25.172')] +[2024-10-02 03:46:08,956][04739] Updated weights for policy 0, policy_version 910 (0.1525) +[2024-10-02 03:46:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3727360. Throughput: 0: 210.5. Samples: 933672. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:12,732][00821] Avg episode reward: [(0, '24.805')] +[2024-10-02 03:46:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3731456. Throughput: 0: 208.4. Samples: 935024. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:17,733][00821] Avg episode reward: [(0, '24.728')] +[2024-10-02 03:46:22,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3739648. Throughput: 0: 215.2. Samples: 936436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:46:22,741][00821] Avg episode reward: [(0, '24.848')] +[2024-10-02 03:46:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3739648. Throughput: 0: 215.7. Samples: 937070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:46:27,731][00821] Avg episode reward: [(0, '25.256')] +[2024-10-02 03:46:29,006][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000914_3743744.pth... +[2024-10-02 03:46:29,156][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth +[2024-10-02 03:46:32,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3743744. Throughput: 0: 198.8. Samples: 938126. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:32,733][00821] Avg episode reward: [(0, '24.962')] +[2024-10-02 03:46:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3747840. Throughput: 0: 210.5. Samples: 939518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-10-02 03:46:37,734][00821] Avg episode reward: [(0, '25.500')] +[2024-10-02 03:46:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 3751936. Throughput: 0: 225.2. Samples: 940446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:46:42,732][00821] Avg episode reward: [(0, '25.273')] +[2024-10-02 03:46:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3756032. Throughput: 0: 206.2. Samples: 941294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:46:47,734][00821] Avg episode reward: [(0, '25.250')] +[2024-10-02 03:46:52,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3760128. Throughput: 0: 205.5. Samples: 942610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:46:52,739][00821] Avg episode reward: [(0, '25.391')] +[2024-10-02 03:46:57,454][04739] Updated weights for policy 0, policy_version 920 (0.1033) +[2024-10-02 03:46:57,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3768320. Throughput: 0: 216.6. Samples: 943420. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:46:57,740][00821] Avg episode reward: [(0, '25.195')] +[2024-10-02 03:47:02,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3768320. Throughput: 0: 214.4. Samples: 944674. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:02,733][00821] Avg episode reward: [(0, '24.899')] +[2024-10-02 03:47:07,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3772416. Throughput: 0: 204.3. Samples: 945628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:07,740][00821] Avg episode reward: [(0, '24.357')] +[2024-10-02 03:47:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3776512. Throughput: 0: 206.6. Samples: 946368. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:47:12,737][00821] Avg episode reward: [(0, '24.628')] +[2024-10-02 03:47:17,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3784704. Throughput: 0: 214.0. Samples: 947756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:47:17,732][00821] Avg episode reward: [(0, '24.346')] +[2024-10-02 03:47:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 3784704. Throughput: 0: 205.3. Samples: 948756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:47:22,731][00821] Avg episode reward: [(0, '23.939')] +[2024-10-02 03:47:27,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3788800. Throughput: 0: 197.0. Samples: 949312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:47:27,732][00821] Avg episode reward: [(0, '24.139')] +[2024-10-02 03:47:32,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3796992. Throughput: 0: 210.4. Samples: 950762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:32,733][00821] Avg episode reward: [(0, '23.627')] +[2024-10-02 03:47:37,729][00821] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3801088. Throughput: 0: 208.9. Samples: 952012. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:37,734][00821] Avg episode reward: [(0, '23.368')] +[2024-10-02 03:47:42,729][00821] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3801088. Throughput: 0: 206.3. Samples: 952704. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:42,732][00821] Avg episode reward: [(0, '23.192')] +[2024-10-02 03:47:47,701][04739] Updated weights for policy 0, policy_version 930 (0.0540) +[2024-10-02 03:47:47,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3809280. Throughput: 0: 205.5. Samples: 953922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:47,734][00821] Avg episode reward: [(0, '23.486')] +[2024-10-02 03:47:52,730][00821] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3813376. Throughput: 0: 215.8. Samples: 955338. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:47:52,738][00821] Avg episode reward: [(0, '23.013')] +[2024-10-02 03:47:57,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3817472. Throughput: 0: 212.5. Samples: 955930. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:47:57,734][00821] Avg episode reward: [(0, '22.520')] +[2024-10-02 03:48:02,729][00821] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3821568. Throughput: 0: 204.0. Samples: 956936. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:48:02,740][00821] Avg episode reward: [(0, '22.760')] +[2024-10-02 03:48:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3825664. Throughput: 0: 211.5. Samples: 958272. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:48:07,737][00821] Avg episode reward: [(0, '22.722')] +[2024-10-02 03:48:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3829760. Throughput: 0: 214.2. Samples: 958952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:48:12,732][00821] Avg episode reward: [(0, '22.486')] +[2024-10-02 03:48:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3833856. Throughput: 0: 205.2. Samples: 959996. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:17,735][00821] Avg episode reward: [(0, '22.885')] +[2024-10-02 03:48:22,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 3837952. Throughput: 0: 202.7. Samples: 961132. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:22,732][00821] Avg episode reward: [(0, '22.621')] +[2024-10-02 03:48:25,933][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000938_3842048.pth... +[2024-10-02 03:48:26,048][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000889_3641344.pth +[2024-10-02 03:48:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3842048. Throughput: 0: 209.4. Samples: 962128. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:27,731][00821] Avg episode reward: [(0, '22.215')] +[2024-10-02 03:48:32,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3846144. Throughput: 0: 210.3. Samples: 963386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:32,738][00821] Avg episode reward: [(0, '22.478')] +[2024-10-02 03:48:36,434][04739] Updated weights for policy 0, policy_version 940 (0.0548) +[2024-10-02 03:48:37,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3850240. Throughput: 0: 199.4. Samples: 964312. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:48:37,742][00821] Avg episode reward: [(0, '22.193')] +[2024-10-02 03:48:42,729][00821] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 3854336. Throughput: 0: 204.8. Samples: 965148. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-10-02 03:48:42,731][00821] Avg episode reward: [(0, '21.149')] +[2024-10-02 03:48:47,731][00821] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3858432. Throughput: 0: 211.3. Samples: 966446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:47,736][00821] Avg episode reward: [(0, '22.437')] +[2024-10-02 03:48:52,735][00821] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 833.1). Total num frames: 3862528. Throughput: 0: 209.0. Samples: 967676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:52,738][00821] Avg episode reward: [(0, '22.304')] +[2024-10-02 03:48:57,729][00821] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3866624. Throughput: 0: 205.4. Samples: 968196. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:48:57,731][00821] Avg episode reward: [(0, '22.508')] +[2024-10-02 03:49:02,729][00821] Fps is (10 sec: 819.7, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3870720. Throughput: 0: 213.0. Samples: 969580. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:49:02,734][00821] Avg episode reward: [(0, '22.976')] +[2024-10-02 03:49:07,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3874816. Throughput: 0: 216.7. Samples: 970882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:49:07,736][00821] Avg episode reward: [(0, '22.976')] +[2024-10-02 03:49:12,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3878912. Throughput: 0: 203.3. Samples: 971276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:49:12,732][00821] Avg episode reward: [(0, '23.251')] +[2024-10-02 03:49:17,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3883008. Throughput: 0: 198.3. Samples: 972308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:49:17,736][00821] Avg episode reward: [(0, '24.266')] +[2024-10-02 03:49:22,730][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3887104. Throughput: 0: 221.0. Samples: 974256. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-10-02 03:49:22,735][00821] Avg episode reward: [(0, '24.622')] +[2024-10-02 03:49:24,547][04739] Updated weights for policy 0, policy_version 950 (0.2522) +[2024-10-02 03:49:27,729][00821] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 3891200. Throughput: 0: 207.2. Samples: 974470. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-10-02 03:49:27,732][00821] Avg episode reward: [(0, '24.932')] +[2024-10-02 03:49:28,654][00821] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 821], exiting... +[2024-10-02 03:49:28,660][04725] Stopping Batcher_0... +[2024-10-02 03:49:28,662][04725] Loop batcher_evt_loop terminating... +[2024-10-02 03:49:28,659][00821] Runner profile tree view: +main_loop: 4743.4015 +[2024-10-02 03:49:28,665][00821] Collected {0: 3891200}, FPS: 820.3 +[2024-10-02 03:49:29,207][04739] Weights refcount: 2 0 +[2024-10-02 03:49:29,210][04739] Stopping InferenceWorker_p0-w0... +[2024-10-02 03:49:29,210][04739] Loop inference_proc0-0_evt_loop terminating... +[2024-10-02 03:49:29,313][04746] Stopping RolloutWorker_w7... +[2024-10-02 03:49:29,332][04746] Loop rollout_proc7_evt_loop terminating... +[2024-10-02 03:49:29,327][04743] Stopping RolloutWorker_w3... +[2024-10-02 03:49:29,335][04743] Loop rollout_proc3_evt_loop terminating... +[2024-10-02 03:49:29,319][04744] Stopping RolloutWorker_w5... +[2024-10-02 03:49:29,346][04744] Loop rollout_proc5_evt_loop terminating... +[2024-10-02 03:49:29,344][04740] Stopping RolloutWorker_w1... +[2024-10-02 03:49:29,361][04740] Loop rollout_proc1_evt_loop terminating... +[2024-10-02 03:49:29,661][04745] Stopping RolloutWorker_w6... +[2024-10-02 03:49:29,744][04745] Loop rollout_proc6_evt_loop terminating... +[2024-10-02 03:49:29,806][04742] Stopping RolloutWorker_w4... +[2024-10-02 03:49:29,883][04742] Loop rollout_proc4_evt_loop terminating... +[2024-10-02 03:49:29,818][04738] Stopping RolloutWorker_w0... +[2024-10-02 03:49:29,937][04738] Loop rollout_proc0_evt_loop terminating... +[2024-10-02 03:49:30,128][04741] EvtLoop [rollout_proc2_evt_loop, process=rollout_proc2] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance2'), args=(0, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gym/core.py", line 319, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gym/core.py", line 384, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 88, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gym/core.py", line 319, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-10-02 03:49:30,332][04741] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc2_evt_loop +[2024-10-02 03:49:31,662][04725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000951_3895296.pth... +[2024-10-02 03:49:31,852][04725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000914_3743744.pth +[2024-10-02 03:49:31,872][04725] Stopping LearnerWorker_p0... +[2024-10-02 03:49:31,875][04725] Loop learner_proc0_evt_loop terminating... +[2024-10-02 03:49:35,377][00821] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-02 03:49:35,381][00821] Overriding arg 'num_workers' with value 1 passed from command line +[2024-10-02 03:49:35,385][00821] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-10-02 03:49:35,388][00821] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-10-02 03:49:35,390][00821] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-10-02 03:49:35,394][00821] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-10-02 03:49:35,396][00821] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-10-02 03:49:35,398][00821] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-10-02 03:49:35,400][00821] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-10-02 03:49:35,401][00821] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-10-02 03:49:35,402][00821] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-10-02 03:49:35,404][00821] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-10-02 03:49:35,405][00821] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-10-02 03:49:35,406][00821] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-10-02 03:49:35,408][00821] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-10-02 03:49:35,445][00821] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-02 03:49:35,449][00821] RunningMeanStd input shape: (3, 72, 128) +[2024-10-02 03:49:35,456][00821] RunningMeanStd input shape: (1,) +[2024-10-02 03:49:35,498][00821] ConvEncoder: input_channels=3 +[2024-10-02 03:49:35,669][00821] Conv encoder output size: 512 +[2024-10-02 03:49:35,671][00821] Policy head output size: 512 +[2024-10-02 03:49:35,698][00821] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000951_3895296.pth... +[2024-10-02 03:49:36,574][00821] Num frames 100... +[2024-10-02 03:49:36,790][00821] Num frames 200... +[2024-10-02 03:49:37,000][00821] Num frames 300... +[2024-10-02 03:49:37,234][00821] Num frames 400... +[2024-10-02 03:49:37,454][00821] Num frames 500... +[2024-10-02 03:49:37,681][00821] Num frames 600... +[2024-10-02 03:49:37,899][00821] Num frames 700... +[2024-10-02 03:49:38,137][00821] Num frames 800... +[2024-10-02 03:49:38,347][00821] Num frames 900... +[2024-10-02 03:49:38,474][00821] Avg episode rewards: #0: 18.290, true rewards: #0: 9.290 +[2024-10-02 03:49:38,476][00821] Avg episode reward: 18.290, avg true_objective: 9.290 +[2024-10-02 03:49:38,632][00821] Num frames 1000... +[2024-10-02 03:49:38,854][00821] Num frames 1100... +[2024-10-02 03:49:39,081][00821] Num frames 1200... +[2024-10-02 03:49:39,357][00821] Avg episode rewards: #0: 12.465, true rewards: #0: 6.465 +[2024-10-02 03:49:39,359][00821] Avg episode reward: 12.465, avg true_objective: 6.465 +[2024-10-02 03:49:39,377][00821] Num frames 1300... +[2024-10-02 03:49:39,594][00821] Num frames 1400... +[2024-10-02 03:49:39,812][00821] Num frames 1500... +[2024-10-02 03:49:40,046][00821] Num frames 1600... +[2024-10-02 03:49:40,271][00821] Num frames 1700... +[2024-10-02 03:49:40,493][00821] Num frames 1800... +[2024-10-02 03:49:40,715][00821] Num frames 1900... +[2024-10-02 03:49:40,938][00821] Num frames 2000... +[2024-10-02 03:49:41,161][00821] Num frames 2100... +[2024-10-02 03:49:41,385][00821] Num frames 2200... +[2024-10-02 03:49:41,598][00821] Num frames 2300... +[2024-10-02 03:49:41,808][00821] Num frames 2400... +[2024-10-02 03:49:42,040][00821] Num frames 2500... +[2024-10-02 03:49:42,180][00821] Avg episode rewards: #0: 18.793, true rewards: #0: 8.460 +[2024-10-02 03:49:42,182][00821] Avg episode reward: 18.793, avg true_objective: 8.460 +[2024-10-02 03:49:42,324][00821] Num frames 2600... +[2024-10-02 03:49:42,547][00821] Num frames 2700... +[2024-10-02 03:49:42,768][00821] Num frames 2800... +[2024-10-02 03:49:43,086][00821] Num frames 2900... +[2024-10-02 03:49:43,396][00821] Num frames 3000... +[2024-10-02 03:49:43,681][00821] Num frames 3100... +[2024-10-02 03:49:43,972][00821] Num frames 3200... +[2024-10-02 03:49:44,271][00821] Num frames 3300... +[2024-10-02 03:49:44,585][00821] Num frames 3400... +[2024-10-02 03:49:44,894][00821] Num frames 3500... +[2024-10-02 03:49:45,205][00821] Num frames 3600... +[2024-10-02 03:49:45,299][00821] Avg episode rewards: #0: 21.280, true rewards: #0: 9.030 +[2024-10-02 03:49:45,301][00821] Avg episode reward: 21.280, avg true_objective: 9.030 +[2024-10-02 03:49:45,577][00821] Num frames 3700... +[2024-10-02 03:49:45,884][00821] Num frames 3800... +[2024-10-02 03:49:46,117][00821] Num frames 3900... +[2024-10-02 03:49:46,335][00821] Num frames 4000... +[2024-10-02 03:49:46,559][00821] Num frames 4100... +[2024-10-02 03:49:46,786][00821] Num frames 4200... +[2024-10-02 03:49:47,012][00821] Num frames 4300... +[2024-10-02 03:49:47,240][00821] Num frames 4400... +[2024-10-02 03:49:47,463][00821] Num frames 4500... +[2024-10-02 03:49:47,701][00821] Num frames 4600... +[2024-10-02 03:49:47,920][00821] Num frames 4700... +[2024-10-02 03:49:48,140][00821] Num frames 4800... +[2024-10-02 03:49:48,369][00821] Num frames 4900... +[2024-10-02 03:49:48,600][00821] Num frames 5000... +[2024-10-02 03:49:48,736][00821] Avg episode rewards: #0: 24.670, true rewards: #0: 10.070 +[2024-10-02 03:49:48,740][00821] Avg episode reward: 24.670, avg true_objective: 10.070 +[2024-10-02 03:49:48,885][00821] Num frames 5100... +[2024-10-02 03:49:49,122][00821] Num frames 5200... +[2024-10-02 03:49:49,339][00821] Num frames 5300... +[2024-10-02 03:49:49,577][00821] Num frames 5400... +[2024-10-02 03:49:49,827][00821] Num frames 5500... +[2024-10-02 03:49:50,058][00821] Num frames 5600... +[2024-10-02 03:49:50,285][00821] Num frames 5700... +[2024-10-02 03:49:50,514][00821] Num frames 5800... +[2024-10-02 03:49:50,819][00821] Avg episode rewards: #0: 23.832, true rewards: #0: 9.832 +[2024-10-02 03:49:50,824][00821] Avg episode reward: 23.832, avg true_objective: 9.832 +[2024-10-02 03:49:50,829][00821] Num frames 5900... +[2024-10-02 03:49:51,070][00821] Num frames 6000... +[2024-10-02 03:49:51,307][00821] Num frames 6100... +[2024-10-02 03:49:51,530][00821] Num frames 6200... +[2024-10-02 03:49:51,640][00821] Avg episode rewards: #0: 21.320, true rewards: #0: 8.891 +[2024-10-02 03:49:51,642][00821] Avg episode reward: 21.320, avg true_objective: 8.891 +[2024-10-02 03:49:51,806][00821] Num frames 6300... +[2024-10-02 03:49:52,025][00821] Num frames 6400... +[2024-10-02 03:49:52,258][00821] Num frames 6500... +[2024-10-02 03:49:52,480][00821] Num frames 6600... +[2024-10-02 03:49:52,704][00821] Num frames 6700... +[2024-10-02 03:49:52,915][00821] Num frames 6800... +[2024-10-02 03:49:53,132][00821] Num frames 6900... +[2024-10-02 03:49:53,349][00821] Num frames 7000... +[2024-10-02 03:49:53,565][00821] Num frames 7100... +[2024-10-02 03:49:53,665][00821] Avg episode rewards: #0: 21.150, true rewards: #0: 8.900 +[2024-10-02 03:49:53,667][00821] Avg episode reward: 21.150, avg true_objective: 8.900 +[2024-10-02 03:49:53,854][00821] Num frames 7200... +[2024-10-02 03:49:54,076][00821] Num frames 7300... +[2024-10-02 03:49:54,290][00821] Num frames 7400... +[2024-10-02 03:49:54,504][00821] Num frames 7500... +[2024-10-02 03:49:54,724][00821] Num frames 7600... +[2024-10-02 03:49:54,940][00821] Num frames 7700... +[2024-10-02 03:49:55,165][00821] Num frames 7800... +[2024-10-02 03:49:55,385][00821] Num frames 7900... +[2024-10-02 03:49:55,597][00821] Num frames 8000... +[2024-10-02 03:49:55,827][00821] Num frames 8100... +[2024-10-02 03:49:56,105][00821] Num frames 8200... +[2024-10-02 03:49:56,398][00821] Num frames 8300... +[2024-10-02 03:49:56,702][00821] Num frames 8400... +[2024-10-02 03:49:57,020][00821] Num frames 8500... +[2024-10-02 03:49:57,247][00821] Avg episode rewards: #0: 22.286, true rewards: #0: 9.508 +[2024-10-02 03:49:57,253][00821] Avg episode reward: 22.286, avg true_objective: 9.508 +[2024-10-02 03:49:57,379][00821] Num frames 8600... +[2024-10-02 03:49:57,691][00821] Num frames 8700... +[2024-10-02 03:49:58,027][00821] Num frames 8800... +[2024-10-02 03:49:58,338][00821] Num frames 8900... +[2024-10-02 03:49:58,649][00821] Num frames 9000... +[2024-10-02 03:49:58,953][00821] Num frames 9100... +[2024-10-02 03:49:59,238][00821] Num frames 9200... +[2024-10-02 03:49:59,462][00821] Num frames 9300... +[2024-10-02 03:49:59,539][00821] Avg episode rewards: #0: 21.808, true rewards: #0: 9.308 +[2024-10-02 03:49:59,542][00821] Avg episode reward: 21.808, avg true_objective: 9.308 +[2024-10-02 03:51:10,677][00821] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-10-02 03:52:07,518][00821] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-02 03:52:07,524][00821] Overriding arg 'num_workers' with value 1 passed from command line +[2024-10-02 03:52:07,526][00821] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-10-02 03:52:07,529][00821] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-10-02 03:52:07,534][00821] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-10-02 03:52:07,535][00821] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-10-02 03:52:07,537][00821] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-10-02 03:52:07,539][00821] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-10-02 03:52:07,541][00821] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-10-02 03:52:07,543][00821] Adding new argument 'hf_repository'='ValentinGuigon/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-10-02 03:52:07,544][00821] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-10-02 03:52:07,546][00821] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-10-02 03:52:07,547][00821] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-10-02 03:52:07,549][00821] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-10-02 03:52:07,550][00821] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-10-02 03:52:07,571][00821] RunningMeanStd input shape: (3, 72, 128) +[2024-10-02 03:52:07,574][00821] RunningMeanStd input shape: (1,) +[2024-10-02 03:52:07,605][00821] ConvEncoder: input_channels=3 +[2024-10-02 03:52:07,689][00821] Conv encoder output size: 512 +[2024-10-02 03:52:07,692][00821] Policy head output size: 512 +[2024-10-02 03:52:07,727][00821] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000951_3895296.pth... +[2024-10-02 03:52:08,609][00821] Num frames 100... +[2024-10-02 03:52:08,851][00821] Num frames 200... +[2024-10-02 03:52:09,066][00821] Num frames 300... +[2024-10-02 03:52:09,286][00821] Num frames 400... +[2024-10-02 03:52:09,504][00821] Num frames 500... +[2024-10-02 03:52:09,736][00821] Num frames 600... +[2024-10-02 03:52:09,976][00821] Num frames 700... +[2024-10-02 03:52:10,228][00821] Num frames 800... +[2024-10-02 03:52:10,458][00821] Num frames 900... +[2024-10-02 03:52:10,711][00821] Num frames 1000... +[2024-10-02 03:52:10,932][00821] Num frames 1100... +[2024-10-02 03:52:11,170][00821] Num frames 1200... +[2024-10-02 03:52:11,388][00821] Num frames 1300... +[2024-10-02 03:52:11,611][00821] Num frames 1400... +[2024-10-02 03:52:11,676][00821] Avg episode rewards: #0: 38.010, true rewards: #0: 14.010 +[2024-10-02 03:52:11,678][00821] Avg episode reward: 38.010, avg true_objective: 14.010 +[2024-10-02 03:52:11,891][00821] Num frames 1500... +[2024-10-02 03:52:12,117][00821] Num frames 1600... +[2024-10-02 03:52:12,340][00821] Num frames 1700... +[2024-10-02 03:52:12,561][00821] Num frames 1800... +[2024-10-02 03:52:12,786][00821] Num frames 1900... +[2024-10-02 03:52:13,005][00821] Num frames 2000... +[2024-10-02 03:52:13,228][00821] Num frames 2100... +[2024-10-02 03:52:13,453][00821] Num frames 2200... +[2024-10-02 03:52:13,681][00821] Num frames 2300... +[2024-10-02 03:52:13,920][00821] Num frames 2400... +[2024-10-02 03:52:14,150][00821] Num frames 2500... +[2024-10-02 03:52:14,391][00821] Num frames 2600... +[2024-10-02 03:52:14,622][00821] Num frames 2700... +[2024-10-02 03:52:14,857][00821] Num frames 2800... +[2024-10-02 03:52:15,086][00821] Num frames 2900... +[2024-10-02 03:52:15,319][00821] Num frames 3000... +[2024-10-02 03:52:15,564][00821] Num frames 3100... +[2024-10-02 03:52:15,796][00821] Num frames 3200... +[2024-10-02 03:52:16,033][00821] Num frames 3300... +[2024-10-02 03:52:16,269][00821] Num frames 3400... +[2024-10-02 03:52:16,503][00821] Num frames 3500... +[2024-10-02 03:52:16,569][00821] Avg episode rewards: #0: 47.505, true rewards: #0: 17.505 +[2024-10-02 03:52:16,571][00821] Avg episode reward: 47.505, avg true_objective: 17.505 +[2024-10-02 03:52:16,812][00821] Num frames 3600... +[2024-10-02 03:52:17,048][00821] Num frames 3700... +[2024-10-02 03:52:17,285][00821] Num frames 3800... +[2024-10-02 03:52:17,540][00821] Num frames 3900... +[2024-10-02 03:52:17,786][00821] Num frames 4000... +[2024-10-02 03:52:18,044][00821] Num frames 4100... +[2024-10-02 03:52:18,296][00821] Num frames 4200... +[2024-10-02 03:52:18,572][00821] Num frames 4300... +[2024-10-02 03:52:18,880][00821] Num frames 4400... +[2024-10-02 03:52:19,199][00821] Num frames 4500... +[2024-10-02 03:52:19,656][00821] Num frames 4600... +[2024-10-02 03:52:19,988][00821] Num frames 4700... +[2024-10-02 03:52:20,444][00821] Num frames 4800... +[2024-10-02 03:52:20,779][00821] Num frames 4900... +[2024-10-02 03:52:21,144][00821] Num frames 5000... +[2024-10-02 03:52:21,495][00821] Num frames 5100... +[2024-10-02 03:52:21,855][00821] Num frames 5200... +[2024-10-02 03:52:22,234][00821] Num frames 5300... +[2024-10-02 03:52:22,493][00821] Num frames 5400... +[2024-10-02 03:52:22,755][00821] Num frames 5500... +[2024-10-02 03:52:22,989][00821] Num frames 5600... +[2024-10-02 03:52:23,056][00821] Avg episode rewards: #0: 53.003, true rewards: #0: 18.670 +[2024-10-02 03:52:23,059][00821] Avg episode reward: 53.003, avg true_objective: 18.670 +[2024-10-02 03:52:23,280][00821] Num frames 5700... +[2024-10-02 03:52:23,507][00821] Num frames 5800... +[2024-10-02 03:52:23,748][00821] Num frames 5900... +[2024-10-02 03:52:23,971][00821] Num frames 6000... +[2024-10-02 03:52:24,189][00821] Num frames 6100... +[2024-10-02 03:52:24,416][00821] Num frames 6200... +[2024-10-02 03:52:24,635][00821] Num frames 6300... +[2024-10-02 03:52:24,857][00821] Num frames 6400... +[2024-10-02 03:52:25,089][00821] Num frames 6500... +[2024-10-02 03:52:25,325][00821] Num frames 6600... +[2024-10-02 03:52:25,556][00821] Num frames 6700... +[2024-10-02 03:52:25,782][00821] Num frames 6800... +[2024-10-02 03:52:26,006][00821] Num frames 6900... +[2024-10-02 03:52:26,247][00821] Num frames 7000... +[2024-10-02 03:52:26,481][00821] Num frames 7100... +[2024-10-02 03:52:26,630][00821] Avg episode rewards: #0: 49.592, true rewards: #0: 17.843 +[2024-10-02 03:52:26,632][00821] Avg episode reward: 49.592, avg true_objective: 17.843 +[2024-10-02 03:52:26,768][00821] Num frames 7200... +[2024-10-02 03:52:26,990][00821] Num frames 7300... +[2024-10-02 03:52:27,215][00821] Num frames 7400... +[2024-10-02 03:52:27,441][00821] Num frames 7500... +[2024-10-02 03:52:27,656][00821] Num frames 7600... +[2024-10-02 03:52:27,872][00821] Num frames 7700... +[2024-10-02 03:52:28,089][00821] Num frames 7800... +[2024-10-02 03:52:28,311][00821] Num frames 7900... +[2024-10-02 03:52:28,388][00821] Avg episode rewards: #0: 43.210, true rewards: #0: 15.810 +[2024-10-02 03:52:28,390][00821] Avg episode reward: 43.210, avg true_objective: 15.810 +[2024-10-02 03:52:28,612][00821] Num frames 8000... +[2024-10-02 03:52:28,841][00821] Num frames 8100... +[2024-10-02 03:52:29,070][00821] Num frames 8200... +[2024-10-02 03:52:29,287][00821] Num frames 8300... +[2024-10-02 03:52:29,524][00821] Num frames 8400... +[2024-10-02 03:52:29,761][00821] Num frames 8500... +[2024-10-02 03:52:29,857][00821] Avg episode rewards: #0: 38.355, true rewards: #0: 14.188 +[2024-10-02 03:52:29,859][00821] Avg episode reward: 38.355, avg true_objective: 14.188 +[2024-10-02 03:52:30,091][00821] Num frames 8600... +[2024-10-02 03:52:30,348][00821] Num frames 8700... +[2024-10-02 03:52:30,598][00821] Num frames 8800... +[2024-10-02 03:52:30,843][00821] Num frames 8900... +[2024-10-02 03:52:30,998][00821] Avg episode rewards: #0: 33.915, true rewards: #0: 12.773 +[2024-10-02 03:52:31,002][00821] Avg episode reward: 33.915, avg true_objective: 12.773 +[2024-10-02 03:52:31,138][00821] Num frames 9000... +[2024-10-02 03:52:31,370][00821] Num frames 9100... +[2024-10-02 03:52:31,620][00821] Num frames 9200... +[2024-10-02 03:52:31,859][00821] Num frames 9300... +[2024-10-02 03:52:32,086][00821] Num frames 9400... +[2024-10-02 03:52:32,366][00821] Num frames 9500... +[2024-10-02 03:52:32,686][00821] Num frames 9600... +[2024-10-02 03:52:32,773][00821] Avg episode rewards: #0: 31.257, true rewards: #0: 12.008 +[2024-10-02 03:52:32,776][00821] Avg episode reward: 31.257, avg true_objective: 12.008 +[2024-10-02 03:52:33,057][00821] Num frames 9700... +[2024-10-02 03:52:33,344][00821] Num frames 9800... +[2024-10-02 03:52:33,669][00821] Num frames 9900... +[2024-10-02 03:52:33,998][00821] Num frames 10000... +[2024-10-02 03:52:34,301][00821] Num frames 10100... +[2024-10-02 03:52:34,643][00821] Num frames 10200... +[2024-10-02 03:52:34,967][00821] Num frames 10300... +[2024-10-02 03:52:35,276][00821] Num frames 10400... +[2024-10-02 03:52:35,536][00821] Num frames 10500... +[2024-10-02 03:52:35,778][00821] Num frames 10600... +[2024-10-02 03:52:36,026][00821] Num frames 10700... +[2024-10-02 03:52:36,255][00821] Num frames 10800... +[2024-10-02 03:52:36,530][00821] Num frames 10900... +[2024-10-02 03:52:36,798][00821] Avg episode rewards: #0: 31.535, true rewards: #0: 12.202 +[2024-10-02 03:52:36,800][00821] Avg episode reward: 31.535, avg true_objective: 12.202 +[2024-10-02 03:52:36,846][00821] Num frames 11000... +[2024-10-02 03:52:37,093][00821] Num frames 11100... +[2024-10-02 03:52:37,334][00821] Num frames 11200... +[2024-10-02 03:52:37,579][00821] Num frames 11300... +[2024-10-02 03:52:37,836][00821] Num frames 11400... +[2024-10-02 03:52:38,075][00821] Num frames 11500... +[2024-10-02 03:52:38,309][00821] Num frames 11600... +[2024-10-02 03:52:38,548][00821] Num frames 11700... +[2024-10-02 03:52:38,803][00821] Num frames 11800... +[2024-10-02 03:52:39,296][00821] Num frames 11900... +[2024-10-02 03:52:39,903][00821] Num frames 12000... +[2024-10-02 03:52:40,480][00821] Num frames 12100... +[2024-10-02 03:52:40,709][00821] Avg episode rewards: #0: 30.950, true rewards: #0: 12.150 +[2024-10-02 03:52:40,714][00821] Avg episode reward: 30.950, avg true_objective: 12.150 +[2024-10-02 03:54:16,644][00821] Replay video saved to /content/train_dir/default_experiment/replay.mp4!