diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1986,3 +1986,1459 @@ main_loop: 103.9782 [2023-06-21 01:16:14,304][62782] Avg episode rewards: #0: 12.412, true rewards: #0: 6.912 [2023-06-21 01:16:14,307][62782] Avg episode reward: 12.412, avg true_objective: 6.912 [2023-06-21 01:16:23,345][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4! +[2023-06-21 01:16:44,521][62782] The model has been pushed to https://huggingface.co/mihirdeo16/vizdoom_health_gathering_supreme +[2023-06-21 10:43:01,450][62782] Environment doom_basic already registered, overwriting... +[2023-06-21 10:43:01,453][62782] Environment doom_two_colors_easy already registered, overwriting... +[2023-06-21 10:43:01,454][62782] Environment doom_two_colors_hard already registered, overwriting... +[2023-06-21 10:43:01,454][62782] Environment doom_dm already registered, overwriting... +[2023-06-21 10:43:01,454][62782] Environment doom_dwango5 already registered, overwriting... +[2023-06-21 10:43:01,455][62782] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-06-21 10:43:01,455][62782] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-06-21 10:43:01,455][62782] Environment doom_my_way_home already registered, overwriting... +[2023-06-21 10:43:01,456][62782] Environment doom_deadly_corridor already registered, overwriting... +[2023-06-21 10:43:01,456][62782] Environment doom_defend_the_center already registered, overwriting... +[2023-06-21 10:43:01,456][62782] Environment doom_defend_the_line already registered, overwriting... +[2023-06-21 10:43:01,457][62782] Environment doom_health_gathering already registered, overwriting... +[2023-06-21 10:43:01,457][62782] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-06-21 10:43:01,457][62782] Environment doom_battle already registered, overwriting... +[2023-06-21 10:43:01,458][62782] Environment doom_battle2 already registered, overwriting... +[2023-06-21 10:43:01,458][62782] Environment doom_duel_bots already registered, overwriting... +[2023-06-21 10:43:01,459][62782] Environment doom_deathmatch_bots already registered, overwriting... +[2023-06-21 10:43:01,459][62782] Environment doom_duel already registered, overwriting... +[2023-06-21 10:43:01,459][62782] Environment doom_deathmatch_full already registered, overwriting... +[2023-06-21 10:43:01,460][62782] Environment doom_benchmark already registered, overwriting... +[2023-06-21 10:43:01,460][62782] register_encoder_factory: +[2023-06-21 10:43:01,522][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 10:43:01,523][62782] Overriding arg 'train_for_env_steps' with value 4000000 passed from command line +[2023-06-21 10:43:01,535][62782] Experiment dir /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment already exists! +[2023-06-21 10:43:01,536][62782] Resuming existing experiment from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment... +[2023-06-21 10:43:01,536][62782] Weights and Biases integration disabled +[2023-06-21 10:43:01,542][62782] Environment var CUDA_VISIBLE_DEVICES is +[2023-06-21 10:43:02,923][62782] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/Users/md/Code/python/jubilant-memory/RL/train_dir +restart_behavior=resume +device=cpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=4000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=02ef46648112e6fd3adc4475dfd889e784c0ef87 +git_repo_name=https://github.com/mihirdeo16/jubilant-memory.git +[2023-06-21 10:43:02,925][62782] Saving configuration to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json... +[2023-06-21 10:43:02,950][62782] Rollout worker 0 uses device cpu +[2023-06-21 10:43:02,951][62782] Rollout worker 1 uses device cpu +[2023-06-21 10:43:02,951][62782] Rollout worker 2 uses device cpu +[2023-06-21 10:43:02,951][62782] Rollout worker 3 uses device cpu +[2023-06-21 10:43:02,952][62782] Rollout worker 4 uses device cpu +[2023-06-21 10:43:02,952][62782] Rollout worker 5 uses device cpu +[2023-06-21 10:43:02,952][62782] Rollout worker 6 uses device cpu +[2023-06-21 10:43:02,953][62782] Rollout worker 7 uses device cpu +[2023-06-21 10:43:03,010][62782] InferenceWorker_p0-w0: min num requests: 2 +[2023-06-21 10:43:03,047][62782] Starting all processes... +[2023-06-21 10:43:03,048][62782] Starting process learner_proc0 +[2023-06-21 10:43:03,103][62782] Starting all processes... +[2023-06-21 10:43:03,116][62782] Starting process inference_proc0-0 +[2023-06-21 10:43:03,119][62782] Starting process rollout_proc0 +[2023-06-21 10:43:03,119][62782] Starting process rollout_proc1 +[2023-06-21 10:43:03,120][62782] Starting process rollout_proc2 +[2023-06-21 10:43:03,120][62782] Starting process rollout_proc3 +[2023-06-21 10:43:03,120][62782] Starting process rollout_proc4 +[2023-06-21 10:43:03,121][62782] Starting process rollout_proc5 +[2023-06-21 10:43:03,121][62782] Starting process rollout_proc6 +[2023-06-21 10:43:03,121][62782] Starting process rollout_proc7 +[2023-06-21 10:43:05,405][69877] On MacOS, not setting affinity +[2023-06-21 10:43:05,406][69876] Starting seed is not provided +[2023-06-21 10:43:05,407][69876] Initializing actor-critic model on device cpu +[2023-06-21 10:43:05,407][69876] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 10:43:05,410][69876] RunningMeanStd input shape: (1,) +[2023-06-21 10:43:05,421][69876] ConvEncoder: input_channels=3 +[2023-06-21 10:43:05,428][69883] On MacOS, not setting affinity +[2023-06-21 10:43:05,463][69880] On MacOS, not setting affinity +[2023-06-21 10:43:05,479][69879] On MacOS, not setting affinity +[2023-06-21 10:43:05,550][69876] Conv encoder output size: 512 +[2023-06-21 10:43:05,553][69876] Policy head output size: 512 +[2023-06-21 10:43:05,559][69881] On MacOS, not setting affinity +[2023-06-21 10:43:05,559][69885] On MacOS, not setting affinity +[2023-06-21 10:43:05,559][69882] On MacOS, not setting affinity +[2023-06-21 10:43:05,566][69884] On MacOS, not setting affinity +[2023-06-21 10:43:05,571][69876] Created Actor Critic model with architecture: +[2023-06-21 10:43:05,572][69876] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-06-21 10:43:05,575][69876] Using optimizer +[2023-06-21 10:43:05,576][69876] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 10:43:05,621][69876] Loading model from checkpoint +[2023-06-21 10:43:05,631][69876] Loaded experiment state at self.train_step=490, self.env_steps=2007040 +[2023-06-21 10:43:05,638][69876] Initialized policy 0 weights for model version 490 +[2023-06-21 10:43:05,645][69876] LearnerWorker_p0 finished initialization! +[2023-06-21 10:43:05,651][69878] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 10:43:05,653][69878] RunningMeanStd input shape: (1,) +[2023-06-21 10:43:05,668][69878] ConvEncoder: input_channels=3 +[2023-06-21 10:43:05,714][69878] Conv encoder output size: 512 +[2023-06-21 10:43:05,714][69878] Policy head output size: 512 +[2023-06-21 10:43:05,721][62782] Inference worker 0-0 is ready! +[2023-06-21 10:43:05,722][62782] All inference workers are ready! Signal rollout workers to start! +[2023-06-21 10:43:05,758][69879] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,761][69882] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,761][69881] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,762][69885] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,763][69877] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,764][69884] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,764][69880] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:05,765][69883] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 10:43:06,547][62782] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 2007040. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 10:43:07,413][69885] Decorrelating experience for 0 frames... +[2023-06-21 10:43:07,413][69877] Decorrelating experience for 0 frames... +[2023-06-21 10:43:07,413][69880] Decorrelating experience for 0 frames... +[2023-06-21 10:43:07,438][69884] Decorrelating experience for 0 frames... +[2023-06-21 10:43:07,438][69881] Decorrelating experience for 0 frames... +[2023-06-21 10:43:07,439][69879] Decorrelating experience for 0 frames... +[2023-06-21 10:43:08,448][69880] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,448][69877] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,449][69884] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,449][69885] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,455][69883] Decorrelating experience for 0 frames... +[2023-06-21 10:43:08,456][69881] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,460][69879] Decorrelating experience for 32 frames... +[2023-06-21 10:43:08,461][69882] Decorrelating experience for 0 frames... +[2023-06-21 10:43:09,150][69883] Decorrelating experience for 32 frames... +[2023-06-21 10:43:09,151][69882] Decorrelating experience for 32 frames... +[2023-06-21 10:43:10,176][69879] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,182][69881] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,184][69880] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,198][69884] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,634][69883] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,640][69882] Decorrelating experience for 64 frames... +[2023-06-21 10:43:10,844][69877] Decorrelating experience for 64 frames... +[2023-06-21 10:43:11,545][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 2007040. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 10:43:11,552][69885] Decorrelating experience for 64 frames... +[2023-06-21 10:43:12,482][69881] Decorrelating experience for 96 frames... +[2023-06-21 10:43:12,482][69884] Decorrelating experience for 96 frames... +[2023-06-21 10:43:12,483][69880] Decorrelating experience for 96 frames... +[2023-06-21 10:43:12,922][69882] Decorrelating experience for 96 frames... +[2023-06-21 10:43:12,923][69883] Decorrelating experience for 96 frames... +[2023-06-21 10:43:13,083][69877] Decorrelating experience for 96 frames... +[2023-06-21 10:43:13,165][69879] Decorrelating experience for 96 frames... +[2023-06-21 10:43:13,776][69885] Decorrelating experience for 96 frames... +[2023-06-21 10:43:16,547][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 2007040. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 10:43:16,550][62782] Avg episode reward: [(0, '0.320')] +[2023-06-21 10:43:21,548][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 2007040. Throughput: 0: 113.1. Samples: 1696. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 10:43:21,553][62782] Avg episode reward: [(0, '1.473')] +[2023-06-21 10:43:22,987][62782] Heartbeat connected on Batcher_0 +[2023-06-21 10:43:23,024][62782] Heartbeat connected on InferenceWorker_p0-w0 +[2023-06-21 10:43:23,092][62782] Heartbeat connected on RolloutWorker_w3 +[2023-06-21 10:43:23,097][62782] Heartbeat connected on RolloutWorker_w0 +[2023-06-21 10:43:23,102][62782] Heartbeat connected on RolloutWorker_w5 +[2023-06-21 10:43:23,103][62782] Heartbeat connected on RolloutWorker_w4 +[2023-06-21 10:43:23,110][62782] Heartbeat connected on RolloutWorker_w2 +[2023-06-21 10:43:23,114][62782] Heartbeat connected on RolloutWorker_w1 +[2023-06-21 10:43:23,117][62782] Heartbeat connected on RolloutWorker_w6 +[2023-06-21 10:43:23,126][62782] Heartbeat connected on RolloutWorker_w7 +[2023-06-21 10:43:25,135][62782] Heartbeat connected on LearnerWorker_p0 +[2023-06-21 10:43:26,547][62782] Fps is (10 sec: 819.1, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 2015232. Throughput: 0: 132.1. Samples: 2642. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-06-21 10:43:26,549][62782] Avg episode reward: [(0, '3.037')] +[2023-06-21 10:43:31,548][62782] Fps is (10 sec: 1638.4, 60 sec: 655.3, 300 sec: 655.3). Total num frames: 2023424. Throughput: 0: 181.2. Samples: 4530. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:43:31,551][62782] Avg episode reward: [(0, '5.303')] +[2023-06-21 10:43:36,546][62782] Fps is (10 sec: 1228.9, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 2027520. Throughput: 0: 213.6. Samples: 6408. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:43:36,554][62782] Avg episode reward: [(0, '6.612')] +[2023-06-21 10:43:41,549][62782] Fps is (10 sec: 819.2, 60 sec: 702.2, 300 sec: 702.2). Total num frames: 2031616. Throughput: 0: 210.2. Samples: 7356. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:43:41,575][62782] Avg episode reward: [(0, '7.762')] +[2023-06-21 10:43:46,547][62782] Fps is (10 sec: 1228.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 2039808. Throughput: 0: 231.1. Samples: 9246. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:43:46,551][62782] Avg episode reward: [(0, '7.826')] +[2023-06-21 10:43:50,776][69878] Updated weights for policy 0, policy_version 500 (0.0021) +[2023-06-21 10:43:51,547][62782] Fps is (10 sec: 1638.5, 60 sec: 910.2, 300 sec: 910.2). Total num frames: 2048000. Throughput: 0: 247.7. Samples: 11148. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:43:51,548][62782] Avg episode reward: [(0, '8.799')] +[2023-06-21 10:43:56,547][62782] Fps is (10 sec: 1228.9, 60 sec: 901.1, 300 sec: 901.1). Total num frames: 2052096. Throughput: 0: 268.4. Samples: 12080. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:43:56,554][62782] Avg episode reward: [(0, '9.228')] +[2023-06-21 10:44:01,545][62782] Fps is (10 sec: 1229.0, 60 sec: 968.2, 300 sec: 968.2). Total num frames: 2060288. Throughput: 0: 310.5. Samples: 13972. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:01,548][62782] Avg episode reward: [(0, '9.521')] +[2023-06-21 10:44:06,548][62782] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 955.7). Total num frames: 2064384. Throughput: 0: 313.7. Samples: 15814. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:06,551][62782] Avg episode reward: [(0, '9.525')] +[2023-06-21 10:44:11,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1092.2, 300 sec: 1008.2). Total num frames: 2072576. Throughput: 0: 314.0. Samples: 16772. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:11,550][62782] Avg episode reward: [(0, '9.922')] +[2023-06-21 10:44:16,545][62782] Fps is (10 sec: 1229.2, 60 sec: 1160.6, 300 sec: 994.8). Total num frames: 2076672. Throughput: 0: 314.1. Samples: 18662. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:16,547][62782] Avg episode reward: [(0, '11.170')] +[2023-06-21 10:44:16,866][69876] Saving new best policy, reward=11.170! +[2023-06-21 10:44:21,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1297.1, 300 sec: 1037.7). Total num frames: 2084864. Throughput: 0: 314.7. Samples: 20568. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:21,546][62782] Avg episode reward: [(0, '12.319')] +[2023-06-21 10:44:23,312][69876] Saving new best policy, reward=12.319! +[2023-06-21 10:44:23,326][69878] Updated weights for policy 0, policy_version 510 (0.0018) +[2023-06-21 10:44:26,552][62782] Fps is (10 sec: 1227.9, 60 sec: 1228.7, 300 sec: 1023.9). Total num frames: 2088960. Throughput: 0: 314.7. Samples: 21518. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:26,553][62782] Avg episode reward: [(0, '12.784')] +[2023-06-21 10:44:26,650][69876] Saving new best policy, reward=12.784! +[2023-06-21 10:44:31,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1060.2). Total num frames: 2097152. Throughput: 0: 315.0. Samples: 23422. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:31,549][62782] Avg episode reward: [(0, '12.690')] +[2023-06-21 10:44:36,547][62782] Fps is (10 sec: 1229.4, 60 sec: 1228.8, 300 sec: 1046.8). Total num frames: 2101248. Throughput: 0: 314.5. Samples: 25302. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:36,552][62782] Avg episode reward: [(0, '12.506')] +[2023-06-21 10:44:41,545][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1077.9). Total num frames: 2109440. Throughput: 0: 314.8. Samples: 26246. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:41,547][62782] Avg episode reward: [(0, '12.316')] +[2023-06-21 10:44:46,547][62782] Fps is (10 sec: 1638.4, 60 sec: 1297.1, 300 sec: 1105.9). Total num frames: 2117632. Throughput: 0: 313.8. Samples: 28092. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:46,549][62782] Avg episode reward: [(0, '12.021')] +[2023-06-21 10:44:51,545][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1092.3). Total num frames: 2121728. Throughput: 0: 315.0. Samples: 29990. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:44:51,546][62782] Avg episode reward: [(0, '11.554')] +[2023-06-21 10:44:56,147][69878] Updated weights for policy 0, policy_version 520 (0.0014) +[2023-06-21 10:44:56,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1117.1). Total num frames: 2129920. Throughput: 0: 314.8. Samples: 30938. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:44:56,547][62782] Avg episode reward: [(0, '11.596')] +[2023-06-21 10:45:01,546][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1104.1). Total num frames: 2134016. Throughput: 0: 314.6. Samples: 32820. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:45:01,549][62782] Avg episode reward: [(0, '11.180')] +[2023-06-21 10:45:02,719][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000522_2138112.pth... +[2023-06-21 10:45:02,815][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000463_1896448.pth +[2023-06-21 10:45:06,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1126.4). Total num frames: 2142208. Throughput: 0: 314.4. Samples: 34718. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:06,548][62782] Avg episode reward: [(0, '10.977')] +[2023-06-21 10:45:11,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1114.1). Total num frames: 2146304. Throughput: 0: 314.5. Samples: 35668. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:11,552][62782] Avg episode reward: [(0, '10.181')] +[2023-06-21 10:45:16,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1134.3). Total num frames: 2154496. Throughput: 0: 314.4. Samples: 37570. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:16,547][62782] Avg episode reward: [(0, '10.501')] +[2023-06-21 10:45:21,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1122.6). Total num frames: 2158592. Throughput: 0: 314.6. Samples: 39458. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:45:21,554][62782] Avg episode reward: [(0, '10.572')] +[2023-06-21 10:45:26,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.2, 300 sec: 1141.0). Total num frames: 2166784. Throughput: 0: 314.2. Samples: 40386. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:26,549][62782] Avg episode reward: [(0, '10.544')] +[2023-06-21 10:45:28,980][69878] Updated weights for policy 0, policy_version 530 (0.0015) +[2023-06-21 10:45:31,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1129.9). Total num frames: 2170880. Throughput: 0: 315.2. Samples: 42278. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:31,556][62782] Avg episode reward: [(0, '10.948')] +[2023-06-21 10:45:36,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1146.9). Total num frames: 2179072. Throughput: 0: 314.8. Samples: 44156. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:45:36,551][62782] Avg episode reward: [(0, '11.408')] +[2023-06-21 10:45:41,552][62782] Fps is (10 sec: 1228.3, 60 sec: 1228.7, 300 sec: 1136.3). Total num frames: 2183168. Throughput: 0: 313.5. Samples: 45046. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:41,564][62782] Avg episode reward: [(0, '11.296')] +[2023-06-21 10:45:46,545][62782] Fps is (10 sec: 1229.2, 60 sec: 1228.8, 300 sec: 1152.0). Total num frames: 2191360. Throughput: 0: 313.1. Samples: 46910. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:46,547][62782] Avg episode reward: [(0, '11.594')] +[2023-06-21 10:45:51,547][62782] Fps is (10 sec: 1229.2, 60 sec: 1228.8, 300 sec: 1141.9). Total num frames: 2195456. Throughput: 0: 313.4. Samples: 48820. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:51,550][62782] Avg episode reward: [(0, '11.565')] +[2023-06-21 10:45:56,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1156.5). Total num frames: 2203648. Throughput: 0: 313.5. Samples: 49776. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:45:56,550][62782] Avg episode reward: [(0, '11.959')] +[2023-06-21 10:46:01,463][69878] Updated weights for policy 0, policy_version 540 (0.0017) +[2023-06-21 10:46:01,550][62782] Fps is (10 sec: 1638.1, 60 sec: 1297.0, 300 sec: 1170.3). Total num frames: 2211840. Throughput: 0: 313.5. Samples: 51680. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:01,565][62782] Avg episode reward: [(0, '12.728')] +[2023-06-21 10:46:06,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1160.5). Total num frames: 2215936. Throughput: 0: 314.0. Samples: 53588. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:06,548][62782] Avg episode reward: [(0, '12.787')] +[2023-06-21 10:46:07,992][69876] Saving new best policy, reward=12.787! +[2023-06-21 10:46:11,546][62782] Fps is (10 sec: 1229.1, 60 sec: 1297.1, 300 sec: 1173.5). Total num frames: 2224128. Throughput: 0: 314.4. Samples: 54534. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:11,549][62782] Avg episode reward: [(0, '12.692')] +[2023-06-21 10:46:16,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1164.1). Total num frames: 2228224. Throughput: 0: 314.7. Samples: 56440. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:16,551][62782] Avg episode reward: [(0, '12.702')] +[2023-06-21 10:46:21,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1176.3). Total num frames: 2236416. Throughput: 0: 315.2. Samples: 58338. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:21,551][62782] Avg episode reward: [(0, '13.728')] +[2023-06-21 10:46:21,555][69876] Saving new best policy, reward=13.728! +[2023-06-21 10:46:26,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1167.4). Total num frames: 2240512. Throughput: 0: 316.6. Samples: 59292. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:26,561][62782] Avg episode reward: [(0, '13.988')] +[2023-06-21 10:46:27,227][69876] Saving new best policy, reward=13.988! +[2023-06-21 10:46:31,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1178.8). Total num frames: 2248704. Throughput: 0: 317.5. Samples: 61198. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:31,550][62782] Avg episode reward: [(0, '13.951')] +[2023-06-21 10:46:33,717][69878] Updated weights for policy 0, policy_version 550 (0.0012) +[2023-06-21 10:46:36,544][62782] Fps is (10 sec: 1229.2, 60 sec: 1228.9, 300 sec: 1170.3). Total num frames: 2252800. Throughput: 0: 317.2. Samples: 63092. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:36,547][62782] Avg episode reward: [(0, '13.768')] +[2023-06-21 10:46:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.2, 300 sec: 1181.2). Total num frames: 2260992. Throughput: 0: 317.0. Samples: 64040. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:41,551][62782] Avg episode reward: [(0, '13.769')] +[2023-06-21 10:46:46,547][62782] Fps is (10 sec: 1228.4, 60 sec: 1228.8, 300 sec: 1172.9). Total num frames: 2265088. Throughput: 0: 317.0. Samples: 65946. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:46,550][62782] Avg episode reward: [(0, '14.008')] +[2023-06-21 10:46:46,551][69876] Saving new best policy, reward=14.008! +[2023-06-21 10:46:51,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1183.3). Total num frames: 2273280. Throughput: 0: 316.9. Samples: 67848. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:51,551][62782] Avg episode reward: [(0, '14.787')] +[2023-06-21 10:46:53,094][69876] Saving new best policy, reward=14.787! +[2023-06-21 10:46:56,542][62782] Fps is (10 sec: 1639.2, 60 sec: 1297.2, 300 sec: 1193.2). Total num frames: 2281472. Throughput: 0: 316.9. Samples: 68792. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:46:56,545][62782] Avg episode reward: [(0, '15.735')] +[2023-06-21 10:46:56,546][69876] Saving new best policy, reward=15.735! +[2023-06-21 10:47:01,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1185.2). Total num frames: 2285568. Throughput: 0: 316.8. Samples: 70694. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:01,551][62782] Avg episode reward: [(0, '16.099')] +[2023-06-21 10:47:02,884][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000559_2289664.pth... +[2023-06-21 10:47:02,978][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth +[2023-06-21 10:47:03,009][69876] Saving new best policy, reward=16.099! +[2023-06-21 10:47:06,106][69878] Updated weights for policy 0, policy_version 560 (0.0024) +[2023-06-21 10:47:06,542][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.2, 300 sec: 1194.7). Total num frames: 2293760. Throughput: 0: 316.7. Samples: 72588. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:06,544][62782] Avg episode reward: [(0, '15.229')] +[2023-06-21 10:47:11,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1187.0). Total num frames: 2297856. Throughput: 0: 316.7. Samples: 73544. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:11,550][62782] Avg episode reward: [(0, '14.760')] +[2023-06-21 10:47:16,547][62782] Fps is (10 sec: 1228.2, 60 sec: 1297.1, 300 sec: 1196.0). Total num frames: 2306048. Throughput: 0: 316.5. Samples: 75442. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:16,549][62782] Avg episode reward: [(0, '14.537')] +[2023-06-21 10:47:21,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1188.6). Total num frames: 2310144. Throughput: 0: 317.0. Samples: 77356. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:21,552][62782] Avg episode reward: [(0, '14.045')] +[2023-06-21 10:47:26,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1197.3). Total num frames: 2318336. Throughput: 0: 317.0. Samples: 78304. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:26,551][62782] Avg episode reward: [(0, '14.430')] +[2023-06-21 10:47:31,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1190.2). Total num frames: 2322432. Throughput: 0: 316.9. Samples: 80206. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:31,551][62782] Avg episode reward: [(0, '14.034')] +[2023-06-21 10:47:36,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1297.0, 300 sec: 1198.5). Total num frames: 2330624. Throughput: 0: 316.9. Samples: 82108. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:36,548][62782] Avg episode reward: [(0, '14.555')] +[2023-06-21 10:47:38,429][69878] Updated weights for policy 0, policy_version 570 (0.0017) +[2023-06-21 10:47:41,549][62782] Fps is (10 sec: 1638.3, 60 sec: 1297.0, 300 sec: 1206.5). Total num frames: 2338816. Throughput: 0: 317.0. Samples: 83060. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:41,554][62782] Avg episode reward: [(0, '14.423')] +[2023-06-21 10:47:46,543][62782] Fps is (10 sec: 1229.2, 60 sec: 1297.2, 300 sec: 1199.6). Total num frames: 2342912. Throughput: 0: 317.3. Samples: 84970. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:46,547][62782] Avg episode reward: [(0, '13.978')] +[2023-06-21 10:47:51,548][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1207.2). Total num frames: 2351104. Throughput: 0: 317.7. Samples: 86884. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:51,550][62782] Avg episode reward: [(0, '13.981')] +[2023-06-21 10:47:56,547][62782] Fps is (10 sec: 1228.3, 60 sec: 1228.7, 300 sec: 1200.6). Total num frames: 2355200. Throughput: 0: 317.4. Samples: 87828. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:47:56,551][62782] Avg episode reward: [(0, '13.250')] +[2023-06-21 10:48:01,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1297.1, 300 sec: 1208.0). Total num frames: 2363392. Throughput: 0: 317.6. Samples: 89736. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:01,549][62782] Avg episode reward: [(0, '13.169')] +[2023-06-21 10:48:06,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1221.9). Total num frames: 2367488. Throughput: 0: 317.4. Samples: 91640. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:06,551][62782] Avg episode reward: [(0, '13.466')] +[2023-06-21 10:48:10,617][69878] Updated weights for policy 0, policy_version 580 (0.0010) +[2023-06-21 10:48:11,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1249.6). Total num frames: 2375680. Throughput: 0: 317.5. Samples: 92590. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:11,549][62782] Avg episode reward: [(0, '14.318')] +[2023-06-21 10:48:16,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2379776. Throughput: 0: 317.5. Samples: 94492. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:16,552][62782] Avg episode reward: [(0, '14.602')] +[2023-06-21 10:48:21,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2387968. Throughput: 0: 317.2. Samples: 96384. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:21,550][62782] Avg episode reward: [(0, '15.054')] +[2023-06-21 10:48:26,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 2392064. Throughput: 0: 317.2. Samples: 97332. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:26,552][62782] Avg episode reward: [(0, '15.107')] +[2023-06-21 10:48:31,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2400256. Throughput: 0: 316.8. Samples: 99226. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:31,551][62782] Avg episode reward: [(0, '14.284')] +[2023-06-21 10:48:36,547][62782] Fps is (10 sec: 1638.4, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 2408448. Throughput: 0: 316.5. Samples: 101128. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:36,549][62782] Avg episode reward: [(0, '13.725')] +[2023-06-21 10:48:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2412544. Throughput: 0: 316.8. Samples: 102084. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:41,550][62782] Avg episode reward: [(0, '14.637')] +[2023-06-21 10:48:42,956][69878] Updated weights for policy 0, policy_version 590 (0.0014) +[2023-06-21 10:48:46,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 2420736. Throughput: 0: 316.8. Samples: 103990. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:46,550][62782] Avg episode reward: [(0, '14.829')] +[2023-06-21 10:48:51,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2424832. Throughput: 0: 316.4. Samples: 105878. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:51,549][62782] Avg episode reward: [(0, '15.085')] +[2023-06-21 10:48:56,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2433024. Throughput: 0: 316.4. Samples: 106828. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:48:56,549][62782] Avg episode reward: [(0, '15.159')] +[2023-06-21 10:49:01,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2437120. Throughput: 0: 316.4. Samples: 108728. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:01,551][62782] Avg episode reward: [(0, '14.737')] +[2023-06-21 10:49:02,461][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000596_2441216.pth... +[2023-06-21 10:49:02,560][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000522_2138112.pth +[2023-06-21 10:49:06,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2445312. Throughput: 0: 316.5. Samples: 110626. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:06,550][62782] Avg episode reward: [(0, '14.679')] +[2023-06-21 10:49:11,549][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2449408. Throughput: 0: 316.6. Samples: 111580. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:11,555][62782] Avg episode reward: [(0, '14.313')] +[2023-06-21 10:49:15,282][69878] Updated weights for policy 0, policy_version 600 (0.0019) +[2023-06-21 10:49:16,543][62782] Fps is (10 sec: 1229.3, 60 sec: 1297.2, 300 sec: 1263.5). Total num frames: 2457600. Throughput: 0: 316.9. Samples: 113486. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:16,545][62782] Avg episode reward: [(0, '13.993')] +[2023-06-21 10:49:21,544][62782] Fps is (10 sec: 1229.2, 60 sec: 1228.9, 300 sec: 1263.5). Total num frames: 2461696. Throughput: 0: 317.1. Samples: 115398. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:21,548][62782] Avg episode reward: [(0, '14.569')] +[2023-06-21 10:49:26,546][62782] Fps is (10 sec: 1228.4, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2469888. Throughput: 0: 317.1. Samples: 116352. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:26,549][62782] Avg episode reward: [(0, '14.951')] +[2023-06-21 10:49:31,547][62782] Fps is (10 sec: 1637.9, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 2478080. Throughput: 0: 316.9. Samples: 118252. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:31,550][62782] Avg episode reward: [(0, '16.339')] +[2023-06-21 10:49:31,552][69876] Saving new best policy, reward=16.339! +[2023-06-21 10:49:36,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2482176. Throughput: 0: 317.4. Samples: 120160. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:36,551][62782] Avg episode reward: [(0, '16.978')] +[2023-06-21 10:49:37,935][69876] Saving new best policy, reward=16.978! +[2023-06-21 10:49:41,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2490368. Throughput: 0: 317.4. Samples: 121112. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:41,548][62782] Avg episode reward: [(0, '17.549')] +[2023-06-21 10:49:41,551][69876] Saving new best policy, reward=17.549! +[2023-06-21 10:49:46,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2494464. Throughput: 0: 317.8. Samples: 123028. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:46,548][62782] Avg episode reward: [(0, '17.416')] +[2023-06-21 10:49:47,492][69878] Updated weights for policy 0, policy_version 610 (0.0014) +[2023-06-21 10:49:51,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 2502656. Throughput: 0: 317.9. Samples: 124932. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:51,551][62782] Avg episode reward: [(0, '17.085')] +[2023-06-21 10:49:56,551][62782] Fps is (10 sec: 1228.2, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2506752. Throughput: 0: 317.9. Samples: 125888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:49:56,568][62782] Avg episode reward: [(0, '16.973')] +[2023-06-21 10:50:01,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2514944. Throughput: 0: 317.7. Samples: 127784. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:01,551][62782] Avg episode reward: [(0, '17.665')] +[2023-06-21 10:50:01,555][69876] Saving new best policy, reward=17.665! +[2023-06-21 10:50:06,547][62782] Fps is (10 sec: 1229.2, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2519040. Throughput: 0: 317.5. Samples: 129686. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:06,551][62782] Avg episode reward: [(0, '17.936')] +[2023-06-21 10:50:06,935][69876] Saving new best policy, reward=17.936! +[2023-06-21 10:50:11,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 2527232. Throughput: 0: 317.6. Samples: 130644. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:11,549][62782] Avg episode reward: [(0, '18.680')] +[2023-06-21 10:50:13,312][69876] Saving new best policy, reward=18.680! +[2023-06-21 10:50:16,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2531328. Throughput: 0: 317.8. Samples: 132554. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:16,554][62782] Avg episode reward: [(0, '19.383')] +[2023-06-21 10:50:16,567][69876] Saving new best policy, reward=19.383! +[2023-06-21 10:50:19,683][69878] Updated weights for policy 0, policy_version 620 (0.0019) +[2023-06-21 10:50:21,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 2539520. Throughput: 0: 317.8. Samples: 134460. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:21,549][62782] Avg episode reward: [(0, '19.876')] +[2023-06-21 10:50:23,101][69876] Saving new best policy, reward=19.876! +[2023-06-21 10:50:26,547][62782] Fps is (10 sec: 1638.5, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 2547712. Throughput: 0: 317.6. Samples: 135404. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:26,549][62782] Avg episode reward: [(0, '20.443')] +[2023-06-21 10:50:26,551][69876] Saving new best policy, reward=20.443! +[2023-06-21 10:50:31,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 2551808. Throughput: 0: 317.3. Samples: 137308. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:31,548][62782] Avg episode reward: [(0, '20.653')] +[2023-06-21 10:50:32,632][69876] Saving new best policy, reward=20.653! +[2023-06-21 10:50:36,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 2560000. Throughput: 0: 317.3. Samples: 139212. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:36,549][62782] Avg episode reward: [(0, '20.839')] +[2023-06-21 10:50:36,551][69876] Saving new best policy, reward=20.839! +[2023-06-21 10:50:41,552][62782] Fps is (10 sec: 1228.0, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2564096. Throughput: 0: 317.3. Samples: 140168. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:41,561][62782] Avg episode reward: [(0, '21.224')] +[2023-06-21 10:50:42,245][69876] Saving new best policy, reward=21.224! +[2023-06-21 10:50:46,551][62782] Fps is (10 sec: 1228.3, 60 sec: 1296.9, 300 sec: 1277.4). Total num frames: 2572288. Throughput: 0: 313.5. Samples: 141894. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:50:46,554][62782] Avg episode reward: [(0, '20.846')] +[2023-06-21 10:50:51,550][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2576384. Throughput: 0: 308.8. Samples: 143584. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:50:51,553][62782] Avg episode reward: [(0, '21.901')] +[2023-06-21 10:50:53,407][69876] Saving new best policy, reward=21.901! +[2023-06-21 10:50:53,424][69878] Updated weights for policy 0, policy_version 630 (0.0020) +[2023-06-21 10:50:56,552][62782] Fps is (10 sec: 819.2, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 2580480. Throughput: 0: 306.5. Samples: 144436. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:50:56,556][62782] Avg episode reward: [(0, '21.199')] +[2023-06-21 10:51:01,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2588672. Throughput: 0: 301.7. Samples: 146134. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:51:01,556][62782] Avg episode reward: [(0, '21.296')] +[2023-06-21 10:51:01,560][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000632_2588672.pth... +[2023-06-21 10:51:01,641][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000559_2289664.pth +[2023-06-21 10:51:06,550][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 2592768. Throughput: 0: 297.2. Samples: 147836. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:06,553][62782] Avg episode reward: [(0, '21.226')] +[2023-06-21 10:51:11,549][62782] Fps is (10 sec: 1229.1, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 2600960. Throughput: 0: 295.3. Samples: 148692. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:11,551][62782] Avg episode reward: [(0, '21.433')] +[2023-06-21 10:51:16,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 2605056. Throughput: 0: 290.8. Samples: 150394. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:16,555][62782] Avg episode reward: [(0, '21.801')] +[2023-06-21 10:51:21,551][62782] Fps is (10 sec: 819.0, 60 sec: 1160.5, 300 sec: 1249.6). Total num frames: 2609152. Throughput: 0: 286.4. Samples: 152100. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:21,555][62782] Avg episode reward: [(0, '22.042')] +[2023-06-21 10:51:22,181][69876] Saving new best policy, reward=22.042! +[2023-06-21 10:51:26,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.4, 300 sec: 1249.6). Total num frames: 2617344. Throughput: 0: 284.0. Samples: 152948. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:26,555][62782] Avg episode reward: [(0, '21.661')] +[2023-06-21 10:51:29,369][69878] Updated weights for policy 0, policy_version 640 (0.0020) +[2023-06-21 10:51:31,560][62782] Fps is (10 sec: 1227.8, 60 sec: 1160.3, 300 sec: 1249.6). Total num frames: 2621440. Throughput: 0: 283.2. Samples: 154640. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:31,562][62782] Avg episode reward: [(0, '21.471')] +[2023-06-21 10:51:36,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.4, 300 sec: 1249.6). Total num frames: 2629632. Throughput: 0: 283.3. Samples: 156332. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:36,555][62782] Avg episode reward: [(0, '20.507')] +[2023-06-21 10:51:41,551][62782] Fps is (10 sec: 1229.8, 60 sec: 1160.6, 300 sec: 1249.6). Total num frames: 2633728. Throughput: 0: 283.3. Samples: 157186. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:41,554][62782] Avg episode reward: [(0, '20.398')] +[2023-06-21 10:51:46,553][62782] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1235.7). Total num frames: 2637824. Throughput: 0: 283.2. Samples: 158880. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:46,558][62782] Avg episode reward: [(0, '19.976')] +[2023-06-21 10:51:51,552][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1235.7). Total num frames: 2646016. Throughput: 0: 282.9. Samples: 160568. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:51:51,555][62782] Avg episode reward: [(0, '19.555')] +[2023-06-21 10:51:56,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1235.7). Total num frames: 2650112. Throughput: 0: 282.7. Samples: 161414. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:51:56,553][62782] Avg episode reward: [(0, '19.896')] +[2023-06-21 10:52:01,553][62782] Fps is (10 sec: 819.2, 60 sec: 1092.2, 300 sec: 1221.8). Total num frames: 2654208. Throughput: 0: 282.7. Samples: 163116. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:01,557][62782] Avg episode reward: [(0, '19.914')] +[2023-06-21 10:52:05,375][69878] Updated weights for policy 0, policy_version 650 (0.0022) +[2023-06-21 10:52:06,552][62782] Fps is (10 sec: 1228.5, 60 sec: 1160.5, 300 sec: 1235.7). Total num frames: 2662400. Throughput: 0: 282.5. Samples: 164814. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:06,553][62782] Avg episode reward: [(0, '19.427')] +[2023-06-21 10:52:11,551][62782] Fps is (10 sec: 1229.0, 60 sec: 1092.2, 300 sec: 1221.8). Total num frames: 2666496. Throughput: 0: 282.4. Samples: 165656. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:11,554][62782] Avg episode reward: [(0, '19.080')] +[2023-06-21 10:52:16,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1235.7). Total num frames: 2674688. Throughput: 0: 282.6. Samples: 167354. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:52:16,552][62782] Avg episode reward: [(0, '19.476')] +[2023-06-21 10:52:21,550][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1221.8). Total num frames: 2678784. Throughput: 0: 282.7. Samples: 169052. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:52:21,553][62782] Avg episode reward: [(0, '19.372')] +[2023-06-21 10:52:26,552][62782] Fps is (10 sec: 819.0, 60 sec: 1092.3, 300 sec: 1221.8). Total num frames: 2682880. Throughput: 0: 282.6. Samples: 169902. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:26,561][62782] Avg episode reward: [(0, '19.504')] +[2023-06-21 10:52:31,552][62782] Fps is (10 sec: 1228.5, 60 sec: 1160.7, 300 sec: 1221.8). Total num frames: 2691072. Throughput: 0: 282.7. Samples: 171602. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:31,555][62782] Avg episode reward: [(0, '19.435')] +[2023-06-21 10:52:36,553][62782] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1208.0). Total num frames: 2695168. Throughput: 0: 282.9. Samples: 173300. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:36,556][62782] Avg episode reward: [(0, '18.693')] +[2023-06-21 10:52:41,550][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1207.9). Total num frames: 2699264. Throughput: 0: 283.2. Samples: 174156. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:41,554][62782] Avg episode reward: [(0, '18.573')] +[2023-06-21 10:52:41,641][69878] Updated weights for policy 0, policy_version 660 (0.0025) +[2023-06-21 10:52:46,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1208.0). Total num frames: 2707456. Throughput: 0: 282.9. Samples: 175846. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:52:46,553][62782] Avg episode reward: [(0, '18.520')] +[2023-06-21 10:52:51,551][62782] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1208.0). Total num frames: 2711552. Throughput: 0: 282.8. Samples: 177540. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:52:51,555][62782] Avg episode reward: [(0, '18.981')] +[2023-06-21 10:52:56,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1207.9). Total num frames: 2719744. Throughput: 0: 282.7. Samples: 178380. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:52:56,555][62782] Avg episode reward: [(0, '18.666')] +[2023-06-21 10:53:01,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1208.0). Total num frames: 2723840. Throughput: 0: 282.8. Samples: 180082. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:53:01,554][62782] Avg episode reward: [(0, '18.352')] +[2023-06-21 10:53:03,411][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000666_2727936.pth... +[2023-06-21 10:53:03,497][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000596_2441216.pth +[2023-06-21 10:53:06,553][62782] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1194.1). Total num frames: 2727936. Throughput: 0: 283.0. Samples: 181790. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:53:06,557][62782] Avg episode reward: [(0, '18.326')] +[2023-06-21 10:53:11,552][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1208.0). Total num frames: 2736128. Throughput: 0: 283.1. Samples: 182640. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:53:11,556][62782] Avg episode reward: [(0, '18.556')] +[2023-06-21 10:53:16,544][62782] Fps is (10 sec: 1229.9, 60 sec: 1092.4, 300 sec: 1194.1). Total num frames: 2740224. Throughput: 0: 283.2. Samples: 184342. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:16,547][62782] Avg episode reward: [(0, '18.499')] +[2023-06-21 10:53:17,672][69878] Updated weights for policy 0, policy_version 670 (0.0020) +[2023-06-21 10:53:21,550][62782] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1208.0). Total num frames: 2748416. Throughput: 0: 283.1. Samples: 186040. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:21,552][62782] Avg episode reward: [(0, '18.956')] +[2023-06-21 10:53:26,552][62782] Fps is (10 sec: 1227.7, 60 sec: 1160.5, 300 sec: 1194.1). Total num frames: 2752512. Throughput: 0: 282.8. Samples: 186882. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:26,557][62782] Avg episode reward: [(0, '19.680')] +[2023-06-21 10:53:31,552][62782] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1180.2). Total num frames: 2756608. Throughput: 0: 283.0. Samples: 188580. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:31,556][62782] Avg episode reward: [(0, '19.620')] +[2023-06-21 10:53:36,552][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1194.1). Total num frames: 2764800. Throughput: 0: 282.8. Samples: 190266. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:36,556][62782] Avg episode reward: [(0, '19.543')] +[2023-06-21 10:53:41,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 2768896. Throughput: 0: 283.1. Samples: 191118. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:41,563][62782] Avg episode reward: [(0, '19.523')] +[2023-06-21 10:53:46,545][62782] Fps is (10 sec: 819.7, 60 sec: 1092.4, 300 sec: 1180.2). Total num frames: 2772992. Throughput: 0: 283.0. Samples: 192816. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:46,550][62782] Avg episode reward: [(0, '19.102')] +[2023-06-21 10:53:51,551][62782] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 2781184. Throughput: 0: 282.8. Samples: 194516. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:51,553][62782] Avg episode reward: [(0, '19.450')] +[2023-06-21 10:53:53,900][69878] Updated weights for policy 0, policy_version 680 (0.0019) +[2023-06-21 10:53:56,553][62782] Fps is (10 sec: 1227.9, 60 sec: 1092.3, 300 sec: 1180.2). Total num frames: 2785280. Throughput: 0: 282.8. Samples: 195364. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:53:56,554][62782] Avg episode reward: [(0, '20.058')] +[2023-06-21 10:54:01,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 2793472. Throughput: 0: 282.5. Samples: 197058. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:01,555][62782] Avg episode reward: [(0, '20.568')] +[2023-06-21 10:54:06,552][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 2797568. Throughput: 0: 282.5. Samples: 198754. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:06,555][62782] Avg episode reward: [(0, '20.499')] +[2023-06-21 10:54:11,550][62782] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1166.3). Total num frames: 2801664. Throughput: 0: 282.6. Samples: 199600. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:11,554][62782] Avg episode reward: [(0, '20.412')] +[2023-06-21 10:54:16,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.4, 300 sec: 1180.2). Total num frames: 2809856. Throughput: 0: 282.8. Samples: 201304. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:16,556][62782] Avg episode reward: [(0, '20.391')] +[2023-06-21 10:54:21,550][62782] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1166.3). Total num frames: 2813952. Throughput: 0: 283.3. Samples: 203012. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:21,554][62782] Avg episode reward: [(0, '20.677')] +[2023-06-21 10:54:26,552][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1166.3). Total num frames: 2822144. Throughput: 0: 283.2. Samples: 203864. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:26,556][62782] Avg episode reward: [(0, '20.792')] +[2023-06-21 10:54:30,185][69878] Updated weights for policy 0, policy_version 690 (0.0018) +[2023-06-21 10:54:31,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1166.3). Total num frames: 2826240. Throughput: 0: 283.2. Samples: 205560. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:31,552][62782] Avg episode reward: [(0, '22.092')] +[2023-06-21 10:54:31,558][69876] Saving new best policy, reward=22.092! +[2023-06-21 10:54:36,551][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1152.4). Total num frames: 2830336. Throughput: 0: 283.1. Samples: 207254. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-06-21 10:54:36,556][62782] Avg episode reward: [(0, '21.812')] +[2023-06-21 10:54:41,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1166.3). Total num frames: 2838528. Throughput: 0: 283.0. Samples: 208100. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:54:41,555][62782] Avg episode reward: [(0, '21.712')] +[2023-06-21 10:54:46,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.4, 300 sec: 1152.4). Total num frames: 2842624. Throughput: 0: 282.9. Samples: 209788. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:54:46,557][62782] Avg episode reward: [(0, '21.706')] +[2023-06-21 10:54:51,550][62782] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1152.4). Total num frames: 2846720. Throughput: 0: 283.2. Samples: 211498. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:54:51,554][62782] Avg episode reward: [(0, '21.992')] +[2023-06-21 10:54:56,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1152.4). Total num frames: 2854912. Throughput: 0: 283.5. Samples: 212358. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:54:56,553][62782] Avg episode reward: [(0, '22.056')] +[2023-06-21 10:55:01,552][62782] Fps is (10 sec: 1228.5, 60 sec: 1092.3, 300 sec: 1152.4). Total num frames: 2859008. Throughput: 0: 283.4. Samples: 214056. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:01,555][62782] Avg episode reward: [(0, '20.985')] +[2023-06-21 10:55:02,963][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000699_2863104.pth... +[2023-06-21 10:55:03,059][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000632_2588672.pth +[2023-06-21 10:55:06,552][62782] Fps is (10 sec: 819.0, 60 sec: 1092.3, 300 sec: 1138.5). Total num frames: 2863104. Throughput: 0: 283.1. Samples: 215754. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:06,554][62782] Avg episode reward: [(0, '21.373')] +[2023-06-21 10:55:06,586][69878] Updated weights for policy 0, policy_version 700 (0.0029) +[2023-06-21 10:55:11,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1152.4). Total num frames: 2871296. Throughput: 0: 283.2. Samples: 216606. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:11,554][62782] Avg episode reward: [(0, '21.717')] +[2023-06-21 10:55:16,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1138.5). Total num frames: 2875392. Throughput: 0: 283.4. Samples: 218312. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:16,555][62782] Avg episode reward: [(0, '21.310')] +[2023-06-21 10:55:21,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 2883584. Throughput: 0: 283.6. Samples: 220014. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:21,554][62782] Avg episode reward: [(0, '21.572')] +[2023-06-21 10:55:26,553][62782] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1138.5). Total num frames: 2887680. Throughput: 0: 283.4. Samples: 220852. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:26,560][62782] Avg episode reward: [(0, '21.109')] +[2023-06-21 10:55:31,552][62782] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 2891776. Throughput: 0: 283.6. Samples: 222552. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:31,557][62782] Avg episode reward: [(0, '21.725')] +[2023-06-21 10:55:36,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 2899968. Throughput: 0: 283.6. Samples: 224258. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:36,553][62782] Avg episode reward: [(0, '22.063')] +[2023-06-21 10:55:41,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 2904064. Throughput: 0: 283.4. Samples: 225110. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:41,554][62782] Avg episode reward: [(0, '22.322')] +[2023-06-21 10:55:42,724][69876] Saving new best policy, reward=22.322! +[2023-06-21 10:55:42,727][69878] Updated weights for policy 0, policy_version 710 (0.0017) +[2023-06-21 10:55:46,550][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1138.5). Total num frames: 2912256. Throughput: 0: 283.4. Samples: 226808. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:46,552][62782] Avg episode reward: [(0, '21.857')] +[2023-06-21 10:55:51,550][62782] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 2916352. Throughput: 0: 283.7. Samples: 228520. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:51,553][62782] Avg episode reward: [(0, '22.129')] +[2023-06-21 10:55:56,549][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 2920448. Throughput: 0: 283.6. Samples: 229366. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:55:56,552][62782] Avg episode reward: [(0, '22.348')] +[2023-06-21 10:55:57,027][69876] Saving new best policy, reward=22.348! +[2023-06-21 10:56:01,551][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 2928640. Throughput: 0: 283.6. Samples: 231074. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:01,555][62782] Avg episode reward: [(0, '22.195')] +[2023-06-21 10:56:06,550][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 2932736. Throughput: 0: 283.6. Samples: 232778. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:06,551][62782] Avg episode reward: [(0, '23.129')] +[2023-06-21 10:56:07,939][69876] Saving new best policy, reward=23.129! +[2023-06-21 10:56:11,552][62782] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 2936832. Throughput: 0: 283.7. Samples: 233618. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:11,555][62782] Avg episode reward: [(0, '22.786')] +[2023-06-21 10:56:16,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 2945024. Throughput: 0: 283.6. Samples: 235314. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:16,554][62782] Avg episode reward: [(0, '22.305')] +[2023-06-21 10:56:18,769][69878] Updated weights for policy 0, policy_version 720 (0.0014) +[2023-06-21 10:56:21,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 2949120. Throughput: 0: 283.7. Samples: 237026. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:21,552][62782] Avg episode reward: [(0, '21.782')] +[2023-06-21 10:56:26,546][62782] Fps is (10 sec: 1229.5, 60 sec: 1160.7, 300 sec: 1138.6). Total num frames: 2957312. Throughput: 0: 283.6. Samples: 237872. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:26,549][62782] Avg episode reward: [(0, '22.127')] +[2023-06-21 10:56:31,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 2961408. Throughput: 0: 283.6. Samples: 239572. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:31,553][62782] Avg episode reward: [(0, '21.791')] +[2023-06-21 10:56:36,552][62782] Fps is (10 sec: 818.7, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 2965504. Throughput: 0: 283.5. Samples: 241276. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:36,555][62782] Avg episode reward: [(0, '21.149')] +[2023-06-21 10:56:41,552][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 2973696. Throughput: 0: 283.5. Samples: 242126. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:41,553][62782] Avg episode reward: [(0, '20.864')] +[2023-06-21 10:56:46,550][62782] Fps is (10 sec: 1229.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 2977792. Throughput: 0: 283.4. Samples: 243826. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:46,553][62782] Avg episode reward: [(0, '20.781')] +[2023-06-21 10:56:51,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 2985984. Throughput: 0: 283.2. Samples: 245522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:51,553][62782] Avg episode reward: [(0, '20.836')] +[2023-06-21 10:56:54,865][69878] Updated weights for policy 0, policy_version 730 (0.0009) +[2023-06-21 10:56:56,551][62782] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 2990080. Throughput: 0: 283.7. Samples: 246384. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:56:56,553][62782] Avg episode reward: [(0, '21.214')] +[2023-06-21 10:57:01,572][62782] Fps is (10 sec: 817.6, 60 sec: 1091.9, 300 sec: 1124.6). Total num frames: 2994176. Throughput: 0: 283.2. Samples: 248062. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:01,579][62782] Avg episode reward: [(0, '20.883')] +[2023-06-21 10:57:02,281][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000732_2998272.pth... +[2023-06-21 10:57:02,372][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000666_2727936.pth +[2023-06-21 10:57:06,552][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 3002368. Throughput: 0: 283.2. Samples: 249768. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:06,554][62782] Avg episode reward: [(0, '21.140')] +[2023-06-21 10:57:11,551][62782] Fps is (10 sec: 1231.1, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 3006464. Throughput: 0: 283.3. Samples: 250624. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:11,553][62782] Avg episode reward: [(0, '20.747')] +[2023-06-21 10:57:16,551][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3010560. Throughput: 0: 283.1. Samples: 252312. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:16,553][62782] Avg episode reward: [(0, '20.341')] +[2023-06-21 10:57:21,552][62782] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 3018752. Throughput: 0: 283.1. Samples: 254016. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:21,553][62782] Avg episode reward: [(0, '19.390')] +[2023-06-21 10:57:26,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 3022848. Throughput: 0: 283.0. Samples: 254860. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 10:57:26,561][62782] Avg episode reward: [(0, '19.129')] +[2023-06-21 10:57:31,257][69878] Updated weights for policy 0, policy_version 740 (0.0010) +[2023-06-21 10:57:31,551][62782] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 3031040. Throughput: 0: 283.0. Samples: 256562. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:31,552][62782] Avg episode reward: [(0, '19.170')] +[2023-06-21 10:57:36,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 3035136. Throughput: 0: 283.1. Samples: 258264. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:36,554][62782] Avg episode reward: [(0, '20.397')] +[2023-06-21 10:57:41,552][62782] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3039232. Throughput: 0: 282.6. Samples: 259100. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:41,556][62782] Avg episode reward: [(0, '20.716')] +[2023-06-21 10:57:46,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1138.6). Total num frames: 3047424. Throughput: 0: 283.4. Samples: 260808. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:46,554][62782] Avg episode reward: [(0, '20.711')] +[2023-06-21 10:57:51,551][62782] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3051520. Throughput: 0: 283.1. Samples: 262508. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:51,561][62782] Avg episode reward: [(0, '20.364')] +[2023-06-21 10:57:56,550][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3055616. Throughput: 0: 283.0. Samples: 263358. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:57:56,551][62782] Avg episode reward: [(0, '21.127')] +[2023-06-21 10:58:01,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.9, 300 sec: 1138.6). Total num frames: 3063808. Throughput: 0: 283.2. Samples: 265056. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:01,555][62782] Avg episode reward: [(0, '21.273')] +[2023-06-21 10:58:06,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3067904. Throughput: 0: 283.4. Samples: 266768. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:06,555][62782] Avg episode reward: [(0, '21.392')] +[2023-06-21 10:58:07,439][69878] Updated weights for policy 0, policy_version 750 (0.0010) +[2023-06-21 10:58:11,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1138.5). Total num frames: 3076096. Throughput: 0: 283.6. Samples: 267624. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:11,554][62782] Avg episode reward: [(0, '21.163')] +[2023-06-21 10:58:16,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 3080192. Throughput: 0: 283.6. Samples: 269322. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:16,554][62782] Avg episode reward: [(0, '21.027')] +[2023-06-21 10:58:21,551][62782] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3084288. Throughput: 0: 283.7. Samples: 271028. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:21,552][62782] Avg episode reward: [(0, '21.069')] +[2023-06-21 10:58:26,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3092480. Throughput: 0: 284.0. Samples: 271880. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:26,553][62782] Avg episode reward: [(0, '20.873')] +[2023-06-21 10:58:31,552][62782] Fps is (10 sec: 1228.6, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 3096576. Throughput: 0: 283.6. Samples: 273572. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:31,553][62782] Avg episode reward: [(0, '20.527')] +[2023-06-21 10:58:36,547][62782] Fps is (10 sec: 1229.3, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3104768. Throughput: 0: 286.3. Samples: 275392. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 10:58:36,551][62782] Avg episode reward: [(0, '20.999')] +[2023-06-21 10:58:41,547][62782] Fps is (10 sec: 1229.5, 60 sec: 1160.6, 300 sec: 1138.5). Total num frames: 3108864. Throughput: 0: 288.6. Samples: 276342. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:58:41,549][62782] Avg episode reward: [(0, '22.291')] +[2023-06-21 10:58:42,862][69878] Updated weights for policy 0, policy_version 760 (0.0013) +[2023-06-21 10:58:46,553][62782] Fps is (10 sec: 818.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3112960. Throughput: 0: 293.2. Samples: 278248. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:58:46,566][62782] Avg episode reward: [(0, '22.520')] +[2023-06-21 10:58:48,381][69876] Signal inference workers to stop experience collection... +[2023-06-21 10:58:48,413][69878] InferenceWorker_p0-w0: stopping experience collection +[2023-06-21 10:58:49,289][69876] Signal inference workers to resume experience collection... +[2023-06-21 10:58:49,289][69878] InferenceWorker_p0-w0: resuming experience collection +[2023-06-21 10:58:51,548][62782] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 3117056. Throughput: 0: 277.9. Samples: 279274. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:58:51,552][62782] Avg episode reward: [(0, '22.372')] +[2023-06-21 10:58:56,545][62782] Fps is (10 sec: 1229.6, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 3125248. Throughput: 0: 279.7. Samples: 280208. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-21 10:58:56,548][62782] Avg episode reward: [(0, '22.662')] +[2023-06-21 10:59:01,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1092.4, 300 sec: 1124.7). Total num frames: 3129344. Throughput: 0: 283.9. Samples: 282098. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-21 10:59:01,552][62782] Avg episode reward: [(0, '22.225')] +[2023-06-21 10:59:01,752][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000765_3133440.pth... +[2023-06-21 10:59:01,850][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000699_2863104.pth +[2023-06-21 10:59:06,547][62782] Fps is (10 sec: 1228.5, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3137536. Throughput: 0: 287.9. Samples: 283984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 10:59:06,551][62782] Avg episode reward: [(0, '21.365')] +[2023-06-21 10:59:11,548][62782] Fps is (10 sec: 1638.2, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3145728. Throughput: 0: 290.1. Samples: 284932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:11,551][62782] Avg episode reward: [(0, '21.488')] +[2023-06-21 10:59:16,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3149824. Throughput: 0: 294.7. Samples: 286834. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:16,549][62782] Avg episode reward: [(0, '22.251')] +[2023-06-21 10:59:18,006][69878] Updated weights for policy 0, policy_version 770 (0.0528) +[2023-06-21 10:59:21,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.9, 300 sec: 1138.6). Total num frames: 3158016. Throughput: 0: 296.5. Samples: 288734. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:21,548][62782] Avg episode reward: [(0, '22.933')] +[2023-06-21 10:59:26,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3162112. Throughput: 0: 296.6. Samples: 289690. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:26,548][62782] Avg episode reward: [(0, '23.016')] +[2023-06-21 10:59:31,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.9, 300 sec: 1152.5). Total num frames: 3170304. Throughput: 0: 296.6. Samples: 291592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:31,549][62782] Avg episode reward: [(0, '23.764')] +[2023-06-21 10:59:31,553][69876] Saving new best policy, reward=23.764! +[2023-06-21 10:59:36,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 3174400. Throughput: 0: 315.9. Samples: 293488. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:36,548][62782] Avg episode reward: [(0, '24.120')] +[2023-06-21 10:59:37,426][69876] Saving new best policy, reward=24.120! +[2023-06-21 10:59:41,546][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1152.5). Total num frames: 3182592. Throughput: 0: 316.2. Samples: 294438. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:41,550][62782] Avg episode reward: [(0, '24.346')] +[2023-06-21 10:59:41,553][69876] Saving new best policy, reward=24.346! +[2023-06-21 10:59:46,554][62782] Fps is (10 sec: 1227.7, 60 sec: 1228.8, 300 sec: 1152.4). Total num frames: 3186688. Throughput: 0: 316.3. Samples: 296336. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 10:59:46,565][62782] Avg episode reward: [(0, '25.125')] +[2023-06-21 10:59:47,318][69876] Saving new best policy, reward=25.125! +[2023-06-21 10:59:50,487][69878] Updated weights for policy 0, policy_version 780 (0.0010) +[2023-06-21 10:59:51,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1297.1, 300 sec: 1152.5). Total num frames: 3194880. Throughput: 0: 316.7. Samples: 298234. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 10:59:51,547][62782] Avg episode reward: [(0, '25.195')] +[2023-06-21 10:59:53,654][69876] Saving new best policy, reward=25.195! +[2023-06-21 10:59:56,548][62782] Fps is (10 sec: 1229.5, 60 sec: 1228.7, 300 sec: 1152.4). Total num frames: 3198976. Throughput: 0: 316.7. Samples: 299184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 10:59:56,550][62782] Avg episode reward: [(0, '25.239')] +[2023-06-21 10:59:57,166][69876] Saving new best policy, reward=25.239! +[2023-06-21 11:00:01,546][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1166.3). Total num frames: 3207168. Throughput: 0: 316.5. Samples: 301076. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:01,549][62782] Avg episode reward: [(0, '25.425')] +[2023-06-21 11:00:03,708][69876] Saving new best policy, reward=25.425! +[2023-06-21 11:00:06,549][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1152.4). Total num frames: 3211264. Throughput: 0: 316.3. Samples: 302968. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:06,556][62782] Avg episode reward: [(0, '24.274')] +[2023-06-21 11:00:11,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1166.3). Total num frames: 3219456. Throughput: 0: 316.1. Samples: 303914. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:11,548][62782] Avg episode reward: [(0, '24.430')] +[2023-06-21 11:00:16,547][62782] Fps is (10 sec: 1638.6, 60 sec: 1297.1, 300 sec: 1166.3). Total num frames: 3227648. Throughput: 0: 316.0. Samples: 305814. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:16,550][62782] Avg episode reward: [(0, '24.674')] +[2023-06-21 11:00:21,546][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1166.3). Total num frames: 3231744. Throughput: 0: 316.2. Samples: 307716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:21,548][62782] Avg episode reward: [(0, '24.861')] +[2023-06-21 11:00:23,166][69878] Updated weights for policy 0, policy_version 790 (0.0015) +[2023-06-21 11:00:26,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1180.2). Total num frames: 3239936. Throughput: 0: 316.2. Samples: 308666. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:26,565][62782] Avg episode reward: [(0, '24.870')] +[2023-06-21 11:00:31,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1166.3). Total num frames: 3244032. Throughput: 0: 316.3. Samples: 310566. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:31,551][62782] Avg episode reward: [(0, '25.250')] +[2023-06-21 11:00:36,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.0, 300 sec: 1180.2). Total num frames: 3252224. Throughput: 0: 316.5. Samples: 312478. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:36,549][62782] Avg episode reward: [(0, '25.525')] +[2023-06-21 11:00:36,551][69876] Saving new best policy, reward=25.525! +[2023-06-21 11:00:41,550][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1166.3). Total num frames: 3256320. Throughput: 0: 316.7. Samples: 313436. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:41,565][62782] Avg episode reward: [(0, '25.453')] +[2023-06-21 11:00:46,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.2, 300 sec: 1180.2). Total num frames: 3264512. Throughput: 0: 316.8. Samples: 315334. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:46,550][62782] Avg episode reward: [(0, '26.068')] +[2023-06-21 11:00:46,551][69876] Saving new best policy, reward=26.068! +[2023-06-21 11:00:51,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1180.2). Total num frames: 3268608. Throughput: 0: 317.2. Samples: 317240. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:51,551][62782] Avg episode reward: [(0, '26.259')] +[2023-06-21 11:00:52,005][69876] Saving new best policy, reward=26.259! +[2023-06-21 11:00:55,180][69878] Updated weights for policy 0, policy_version 800 (0.0013) +[2023-06-21 11:00:56,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1180.2). Total num frames: 3276800. Throughput: 0: 317.3. Samples: 318194. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:00:56,549][62782] Avg episode reward: [(0, '26.044')] +[2023-06-21 11:01:01,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1180.2). Total num frames: 3280896. Throughput: 0: 317.6. Samples: 320104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:01,550][62782] Avg episode reward: [(0, '25.882')] +[2023-06-21 11:01:01,704][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000802_3284992.pth... +[2023-06-21 11:01:01,774][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000732_2998272.pth +[2023-06-21 11:01:06,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1194.1). Total num frames: 3289088. Throughput: 0: 317.5. Samples: 322002. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:06,549][62782] Avg episode reward: [(0, '25.407')] +[2023-06-21 11:01:11,547][62782] Fps is (10 sec: 1638.4, 60 sec: 1297.0, 300 sec: 1194.1). Total num frames: 3297280. Throughput: 0: 317.2. Samples: 322938. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:11,550][62782] Avg episode reward: [(0, '26.564')] +[2023-06-21 11:01:11,553][69876] Saving new best policy, reward=26.564! +[2023-06-21 11:01:16,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1194.1). Total num frames: 3301376. Throughput: 0: 317.2. Samples: 324838. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:16,551][62782] Avg episode reward: [(0, '25.871')] +[2023-06-21 11:01:21,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1194.1). Total num frames: 3309568. Throughput: 0: 317.1. Samples: 326750. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:21,551][62782] Avg episode reward: [(0, '26.101')] +[2023-06-21 11:01:26,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1194.1). Total num frames: 3313664. Throughput: 0: 317.0. Samples: 327700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:26,548][62782] Avg episode reward: [(0, '25.732')] +[2023-06-21 11:01:27,531][69878] Updated weights for policy 0, policy_version 810 (0.0012) +[2023-06-21 11:01:31,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1208.0). Total num frames: 3321856. Throughput: 0: 317.1. Samples: 329604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:31,549][62782] Avg episode reward: [(0, '25.513')] +[2023-06-21 11:01:36,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1194.1). Total num frames: 3325952. Throughput: 0: 317.0. Samples: 331504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:36,557][62782] Avg episode reward: [(0, '25.715')] +[2023-06-21 11:01:41,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1208.0). Total num frames: 3334144. Throughput: 0: 316.8. Samples: 332448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:41,549][62782] Avg episode reward: [(0, '24.516')] +[2023-06-21 11:01:46,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1194.1). Total num frames: 3338240. Throughput: 0: 316.8. Samples: 334358. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:46,551][62782] Avg episode reward: [(0, '24.636')] +[2023-06-21 11:01:51,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1208.0). Total num frames: 3346432. Throughput: 0: 316.9. Samples: 336262. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:51,548][62782] Avg episode reward: [(0, '24.311')] +[2023-06-21 11:01:56,566][62782] Fps is (10 sec: 1637.1, 60 sec: 1296.9, 300 sec: 1221.9). Total num frames: 3354624. Throughput: 0: 317.2. Samples: 337214. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:01:56,650][62782] Avg episode reward: [(0, '24.010')] +[2023-06-21 11:02:00,281][69878] Updated weights for policy 0, policy_version 820 (0.0011) +[2023-06-21 11:02:01,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1208.0). Total num frames: 3358720. Throughput: 0: 316.5. Samples: 339080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:02:01,551][62782] Avg episode reward: [(0, '23.840')] +[2023-06-21 11:02:06,547][62782] Fps is (10 sec: 1229.7, 60 sec: 1297.1, 300 sec: 1221.9). Total num frames: 3366912. Throughput: 0: 315.2. Samples: 340932. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-21 11:02:06,550][62782] Avg episode reward: [(0, '23.069')] +[2023-06-21 11:02:11,546][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1221.9). Total num frames: 3371008. Throughput: 0: 315.2. Samples: 341882. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-21 11:02:11,547][62782] Avg episode reward: [(0, '22.050')] +[2023-06-21 11:02:16,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1221.9). Total num frames: 3379200. Throughput: 0: 315.2. Samples: 343786. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:02:16,550][62782] Avg episode reward: [(0, '22.839')] +[2023-06-21 11:02:21,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1221.9). Total num frames: 3383296. Throughput: 0: 315.0. Samples: 345678. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:02:21,550][62782] Avg episode reward: [(0, '22.486')] +[2023-06-21 11:02:26,546][62782] Fps is (10 sec: 819.3, 60 sec: 1228.8, 300 sec: 1208.0). Total num frames: 3387392. Throughput: 0: 315.2. Samples: 346632. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:26,552][62782] Avg episode reward: [(0, '22.660')] +[2023-06-21 11:02:31,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1221.9). Total num frames: 3395584. Throughput: 0: 313.3. Samples: 348458. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:02:31,548][62782] Avg episode reward: [(0, '21.298')] +[2023-06-21 11:02:32,138][69878] Updated weights for policy 0, policy_version 830 (0.0011) +[2023-06-21 11:02:36,546][62782] Fps is (10 sec: 1638.3, 60 sec: 1297.1, 300 sec: 1235.8). Total num frames: 3403776. Throughput: 0: 313.5. Samples: 350370. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:36,547][62782] Avg episode reward: [(0, '21.316')] +[2023-06-21 11:02:41,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1221.9). Total num frames: 3407872. Throughput: 0: 313.7. Samples: 351328. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:41,550][62782] Avg episode reward: [(0, '21.418')] +[2023-06-21 11:02:46,545][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1235.8). Total num frames: 3416064. Throughput: 0: 314.9. Samples: 353252. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:46,546][62782] Avg episode reward: [(0, '21.606')] +[2023-06-21 11:02:51,546][62782] Fps is (10 sec: 1638.5, 60 sec: 1297.1, 300 sec: 1249.6). Total num frames: 3424256. Throughput: 0: 316.0. Samples: 355150. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:51,548][62782] Avg episode reward: [(0, '22.330')] +[2023-06-21 11:02:56,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1229.0, 300 sec: 1235.8). Total num frames: 3428352. Throughput: 0: 316.2. Samples: 356112. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:02:56,547][62782] Avg episode reward: [(0, '22.256')] +[2023-06-21 11:03:01,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1297.1, 300 sec: 1249.7). Total num frames: 3436544. Throughput: 0: 316.5. Samples: 358028. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:01,546][62782] Avg episode reward: [(0, '22.612')] +[2023-06-21 11:03:01,549][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000839_3436544.pth... +[2023-06-21 11:03:01,637][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000765_3133440.pth +[2023-06-21 11:03:04,026][69878] Updated weights for policy 0, policy_version 840 (0.0007) +[2023-06-21 11:03:06,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1235.8). Total num frames: 3440640. Throughput: 0: 317.0. Samples: 359944. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:06,547][62782] Avg episode reward: [(0, '22.676')] +[2023-06-21 11:03:11,546][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1249.7). Total num frames: 3448832. Throughput: 0: 317.0. Samples: 360898. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:11,548][62782] Avg episode reward: [(0, '23.135')] +[2023-06-21 11:03:16,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3452928. Throughput: 0: 319.1. Samples: 362818. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:16,548][62782] Avg episode reward: [(0, '23.234')] +[2023-06-21 11:03:21,545][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1249.7). Total num frames: 3461120. Throughput: 0: 319.0. Samples: 364724. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:21,546][62782] Avg episode reward: [(0, '22.683')] +[2023-06-21 11:03:26,545][62782] Fps is (10 sec: 1638.7, 60 sec: 1365.3, 300 sec: 1263.5). Total num frames: 3469312. Throughput: 0: 319.1. Samples: 365686. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:26,546][62782] Avg episode reward: [(0, '23.499')] +[2023-06-21 11:03:31,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1249.6). Total num frames: 3473408. Throughput: 0: 319.0. Samples: 367606. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:03:31,549][62782] Avg episode reward: [(0, '23.500')] +[2023-06-21 11:03:36,049][69878] Updated weights for policy 0, policy_version 850 (0.0007) +[2023-06-21 11:03:36,543][62782] Fps is (10 sec: 1229.1, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3481600. Throughput: 0: 319.4. Samples: 369522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:03:36,544][62782] Avg episode reward: [(0, '23.393')] +[2023-06-21 11:03:41,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3485696. Throughput: 0: 319.2. Samples: 370478. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:03:41,549][62782] Avg episode reward: [(0, '23.537')] +[2023-06-21 11:03:46,546][62782] Fps is (10 sec: 1228.4, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 3493888. Throughput: 0: 319.2. Samples: 372392. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:03:46,548][62782] Avg episode reward: [(0, '23.846')] +[2023-06-21 11:03:51,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3497984. Throughput: 0: 319.2. Samples: 374306. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:03:51,548][62782] Avg episode reward: [(0, '23.548')] +[2023-06-21 11:03:56,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 3506176. Throughput: 0: 319.2. Samples: 375262. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:03:56,548][62782] Avg episode reward: [(0, '24.741')] +[2023-06-21 11:04:01,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3510272. Throughput: 0: 317.7. Samples: 377114. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:04:01,550][62782] Avg episode reward: [(0, '25.102')] +[2023-06-21 11:04:06,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3518464. Throughput: 0: 316.0. Samples: 378944. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:04:06,549][62782] Avg episode reward: [(0, '25.436')] +[2023-06-21 11:04:08,927][69878] Updated weights for policy 0, policy_version 860 (0.0019) +[2023-06-21 11:04:11,547][62782] Fps is (10 sec: 1228.5, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3522560. Throughput: 0: 315.8. Samples: 379898. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:04:11,551][62782] Avg episode reward: [(0, '24.798')] +[2023-06-21 11:04:16,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3530752. Throughput: 0: 315.1. Samples: 381786. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:16,552][62782] Avg episode reward: [(0, '24.485')] +[2023-06-21 11:04:21,550][62782] Fps is (10 sec: 1228.5, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3534848. Throughput: 0: 313.9. Samples: 383650. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:21,556][62782] Avg episode reward: [(0, '25.309')] +[2023-06-21 11:04:26,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3543040. Throughput: 0: 313.4. Samples: 384582. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:04:26,552][62782] Avg episode reward: [(0, '24.992')] +[2023-06-21 11:04:31,548][62782] Fps is (10 sec: 1638.7, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 3551232. Throughput: 0: 312.9. Samples: 386474. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:31,550][62782] Avg episode reward: [(0, '25.768')] +[2023-06-21 11:04:36,545][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3555328. Throughput: 0: 312.5. Samples: 388366. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:36,547][62782] Avg episode reward: [(0, '25.919')] +[2023-06-21 11:04:41,135][69878] Updated weights for policy 0, policy_version 870 (0.0018) +[2023-06-21 11:04:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 3563520. Throughput: 0: 312.5. Samples: 389324. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:41,548][62782] Avg episode reward: [(0, '25.577')] +[2023-06-21 11:04:46,546][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3567616. Throughput: 0: 313.4. Samples: 391216. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 11:04:46,550][62782] Avg episode reward: [(0, '25.228')] +[2023-06-21 11:04:51,545][62782] Fps is (10 sec: 1229.1, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 3575808. Throughput: 0: 313.9. Samples: 393070. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:51,548][62782] Avg episode reward: [(0, '25.509')] +[2023-06-21 11:04:56,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3579904. Throughput: 0: 313.7. Samples: 394012. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:04:56,549][62782] Avg episode reward: [(0, '26.024')] +[2023-06-21 11:05:01,546][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 3588096. Throughput: 0: 313.2. Samples: 395882. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:01,549][62782] Avg episode reward: [(0, '26.299')] +[2023-06-21 11:05:01,554][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000876_3588096.pth... +[2023-06-21 11:05:01,660][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000802_3284992.pth +[2023-06-21 11:05:06,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3592192. Throughput: 0: 313.9. Samples: 397776. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:06,550][62782] Avg episode reward: [(0, '26.177')] +[2023-06-21 11:05:11,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3600384. Throughput: 0: 314.2. Samples: 398722. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:11,550][62782] Avg episode reward: [(0, '25.293')] +[2023-06-21 11:05:14,135][69878] Updated weights for policy 0, policy_version 880 (0.0018) +[2023-06-21 11:05:16,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3604480. Throughput: 0: 314.2. Samples: 400614. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:16,551][62782] Avg episode reward: [(0, '25.162')] +[2023-06-21 11:05:21,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3612672. Throughput: 0: 314.2. Samples: 402506. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:21,548][62782] Avg episode reward: [(0, '25.204')] +[2023-06-21 11:05:26,542][62782] Fps is (10 sec: 1229.4, 60 sec: 1228.9, 300 sec: 1263.5). Total num frames: 3616768. Throughput: 0: 313.9. Samples: 403448. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:26,546][62782] Avg episode reward: [(0, '24.798')] +[2023-06-21 11:05:31,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3624960. Throughput: 0: 313.8. Samples: 405338. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:31,550][62782] Avg episode reward: [(0, '24.380')] +[2023-06-21 11:05:36,547][62782] Fps is (10 sec: 1228.3, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3629056. Throughput: 0: 315.0. Samples: 407244. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:36,550][62782] Avg episode reward: [(0, '23.755')] +[2023-06-21 11:05:41,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3637248. Throughput: 0: 315.0. Samples: 408188. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:41,550][62782] Avg episode reward: [(0, '23.390')] +[2023-06-21 11:05:46,350][69878] Updated weights for policy 0, policy_version 890 (0.0013) +[2023-06-21 11:05:46,547][62782] Fps is (10 sec: 1638.3, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 3645440. Throughput: 0: 315.6. Samples: 410086. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:46,550][62782] Avg episode reward: [(0, '23.469')] +[2023-06-21 11:05:51,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3649536. Throughput: 0: 315.7. Samples: 411984. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:51,548][62782] Avg episode reward: [(0, '23.962')] +[2023-06-21 11:05:56,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 3657728. Throughput: 0: 315.8. Samples: 412932. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:05:56,549][62782] Avg episode reward: [(0, '23.272')] +[2023-06-21 11:06:01,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3661824. Throughput: 0: 315.7. Samples: 414822. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:01,551][62782] Avg episode reward: [(0, '23.466')] +[2023-06-21 11:06:06,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3670016. Throughput: 0: 315.8. Samples: 416718. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:06,549][62782] Avg episode reward: [(0, '23.595')] +[2023-06-21 11:06:11,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3674112. Throughput: 0: 315.9. Samples: 417664. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:11,550][62782] Avg episode reward: [(0, '23.447')] +[2023-06-21 11:06:16,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3682304. Throughput: 0: 316.0. Samples: 419560. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:16,549][62782] Avg episode reward: [(0, '24.782')] +[2023-06-21 11:06:18,870][69878] Updated weights for policy 0, policy_version 900 (0.0014) +[2023-06-21 11:06:21,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3686400. Throughput: 0: 315.9. Samples: 421458. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:21,552][62782] Avg episode reward: [(0, '25.187')] +[2023-06-21 11:06:26,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 3694592. Throughput: 0: 315.9. Samples: 422404. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:26,550][62782] Avg episode reward: [(0, '23.964')] +[2023-06-21 11:06:31,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3698688. Throughput: 0: 315.6. Samples: 424286. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:31,551][62782] Avg episode reward: [(0, '23.800')] +[2023-06-21 11:06:36,546][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3706880. Throughput: 0: 315.1. Samples: 426162. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:36,549][62782] Avg episode reward: [(0, '24.069')] +[2023-06-21 11:06:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3710976. Throughput: 0: 315.1. Samples: 427110. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:41,550][62782] Avg episode reward: [(0, '24.788')] +[2023-06-21 11:06:46,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3719168. Throughput: 0: 315.1. Samples: 429002. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:46,551][62782] Avg episode reward: [(0, '25.008')] +[2023-06-21 11:06:51,493][69878] Updated weights for policy 0, policy_version 910 (0.0012) +[2023-06-21 11:06:51,544][62782] Fps is (10 sec: 1638.9, 60 sec: 1297.1, 300 sec: 1263.6). Total num frames: 3727360. Throughput: 0: 315.1. Samples: 430896. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:51,546][62782] Avg episode reward: [(0, '25.131')] +[2023-06-21 11:06:56,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3731456. Throughput: 0: 315.2. Samples: 431846. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:06:56,548][62782] Avg episode reward: [(0, '25.242')] +[2023-06-21 11:07:01,545][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3739648. Throughput: 0: 315.1. Samples: 433738. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:01,547][62782] Avg episode reward: [(0, '25.968')] +[2023-06-21 11:07:01,550][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000913_3739648.pth... +[2023-06-21 11:07:01,662][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000839_3436544.pth +[2023-06-21 11:07:06,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3743744. Throughput: 0: 314.8. Samples: 435626. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:06,550][62782] Avg episode reward: [(0, '25.110')] +[2023-06-21 11:07:11,545][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3751936. Throughput: 0: 315.0. Samples: 436578. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:11,547][62782] Avg episode reward: [(0, '24.866')] +[2023-06-21 11:07:16,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3756032. Throughput: 0: 315.3. Samples: 438476. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:16,550][62782] Avg episode reward: [(0, '25.067')] +[2023-06-21 11:07:21,545][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 3764224. Throughput: 0: 315.7. Samples: 440370. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:21,547][62782] Avg episode reward: [(0, '24.682')] +[2023-06-21 11:07:23,952][69878] Updated weights for policy 0, policy_version 920 (0.0016) +[2023-06-21 11:07:26,550][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3768320. Throughput: 0: 315.6. Samples: 441312. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:26,569][62782] Avg episode reward: [(0, '23.626')] +[2023-06-21 11:07:31,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3776512. Throughput: 0: 315.5. Samples: 443198. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:31,550][62782] Avg episode reward: [(0, '23.502')] +[2023-06-21 11:07:36,550][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3780608. Throughput: 0: 315.7. Samples: 445102. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:07:36,564][62782] Avg episode reward: [(0, '22.587')] +[2023-06-21 11:07:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3788800. Throughput: 0: 315.5. Samples: 446042. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:41,549][62782] Avg episode reward: [(0, '22.696')] +[2023-06-21 11:07:46,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3792896. Throughput: 0: 315.6. Samples: 447940. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:46,550][62782] Avg episode reward: [(0, '22.409')] +[2023-06-21 11:07:51,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3801088. Throughput: 0: 315.6. Samples: 449828. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:07:51,551][62782] Avg episode reward: [(0, '22.894')] +[2023-06-21 11:07:56,385][69878] Updated weights for policy 0, policy_version 930 (0.0019) +[2023-06-21 11:07:56,547][62782] Fps is (10 sec: 1638.4, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 3809280. Throughput: 0: 315.5. Samples: 450778. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:07:56,549][62782] Avg episode reward: [(0, '23.598')] +[2023-06-21 11:08:01,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3813376. Throughput: 0: 315.6. Samples: 452678. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:01,550][62782] Avg episode reward: [(0, '23.715')] +[2023-06-21 11:08:06,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3821568. Throughput: 0: 315.6. Samples: 454572. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:06,549][62782] Avg episode reward: [(0, '23.309')] +[2023-06-21 11:08:11,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3825664. Throughput: 0: 315.7. Samples: 455518. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:11,551][62782] Avg episode reward: [(0, '22.948')] +[2023-06-21 11:08:16,545][62782] Fps is (10 sec: 1229.1, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3833856. Throughput: 0: 315.9. Samples: 457414. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:16,547][62782] Avg episode reward: [(0, '22.730')] +[2023-06-21 11:08:21,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3837952. Throughput: 0: 315.7. Samples: 459310. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:21,550][62782] Avg episode reward: [(0, '22.709')] +[2023-06-21 11:08:26,546][62782] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3846144. Throughput: 0: 315.7. Samples: 460248. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:08:26,549][62782] Avg episode reward: [(0, '22.977')] +[2023-06-21 11:08:28,913][69878] Updated weights for policy 0, policy_version 940 (0.0015) +[2023-06-21 11:08:31,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3850240. Throughput: 0: 315.7. Samples: 462148. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:08:31,549][62782] Avg episode reward: [(0, '23.765')] +[2023-06-21 11:08:36,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3858432. Throughput: 0: 316.1. Samples: 464052. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:36,550][62782] Avg episode reward: [(0, '23.574')] +[2023-06-21 11:08:41,545][62782] Fps is (10 sec: 1229.1, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3862528. Throughput: 0: 315.9. Samples: 464994. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:41,548][62782] Avg episode reward: [(0, '23.452')] +[2023-06-21 11:08:46,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3870720. Throughput: 0: 315.5. Samples: 466876. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:46,550][62782] Avg episode reward: [(0, '23.792')] +[2023-06-21 11:08:51,547][62782] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3874816. Throughput: 0: 315.3. Samples: 468762. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:08:51,550][62782] Avg episode reward: [(0, '25.539')] +[2023-06-21 11:08:56,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3883008. Throughput: 0: 315.5. Samples: 469716. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:08:56,550][62782] Avg episode reward: [(0, '25.785')] +[2023-06-21 11:09:01,429][69878] Updated weights for policy 0, policy_version 950 (0.0018) +[2023-06-21 11:09:01,547][62782] Fps is (10 sec: 1638.4, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3891200. Throughput: 0: 315.5. Samples: 471614. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:01,548][62782] Avg episode reward: [(0, '26.302')] +[2023-06-21 11:09:01,553][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000950_3891200.pth... +[2023-06-21 11:09:01,654][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000876_3588096.pth +[2023-06-21 11:09:06,546][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 3895296. Throughput: 0: 315.2. Samples: 473496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:06,550][62782] Avg episode reward: [(0, '26.181')] +[2023-06-21 11:09:11,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3903488. Throughput: 0: 315.2. Samples: 474434. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:11,549][62782] Avg episode reward: [(0, '26.355')] +[2023-06-21 11:09:16,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3907584. Throughput: 0: 315.0. Samples: 476324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:16,550][62782] Avg episode reward: [(0, '26.452')] +[2023-06-21 11:09:21,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3915776. Throughput: 0: 314.6. Samples: 478210. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:09:21,550][62782] Avg episode reward: [(0, '26.087')] +[2023-06-21 11:09:26,548][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3919872. Throughput: 0: 314.8. Samples: 479160. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:09:26,564][62782] Avg episode reward: [(0, '26.313')] +[2023-06-21 11:09:31,547][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3928064. Throughput: 0: 315.2. Samples: 481062. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:31,550][62782] Avg episode reward: [(0, '26.155')] +[2023-06-21 11:09:33,951][69878] Updated weights for policy 0, policy_version 960 (0.0020) +[2023-06-21 11:09:36,548][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3932160. Throughput: 0: 315.4. Samples: 482954. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:36,551][62782] Avg episode reward: [(0, '26.243')] +[2023-06-21 11:09:41,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 3940352. Throughput: 0: 315.2. Samples: 483900. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:41,551][62782] Avg episode reward: [(0, '26.313')] +[2023-06-21 11:09:46,546][62782] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3944448. Throughput: 0: 315.1. Samples: 485794. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:09:46,549][62782] Avg episode reward: [(0, '26.556')] +[2023-06-21 11:09:51,547][62782] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 3952640. Throughput: 0: 315.5. Samples: 487694. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:51,556][62782] Avg episode reward: [(0, '27.144')] +[2023-06-21 11:09:53,394][69876] Saving new best policy, reward=27.144! +[2023-06-21 11:09:56,547][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 3956736. Throughput: 0: 315.6. Samples: 488638. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:09:56,550][62782] Avg episode reward: [(0, '27.272')] +[2023-06-21 11:09:56,664][69876] Saving new best policy, reward=27.272! +[2023-06-21 11:10:01,551][62782] Fps is (10 sec: 1228.3, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3964928. Throughput: 0: 313.2. Samples: 490418. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:10:01,555][62782] Avg episode reward: [(0, '26.251')] +[2023-06-21 11:10:06,550][62782] Fps is (10 sec: 1228.5, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 3969024. Throughput: 0: 309.0. Samples: 492116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-21 11:10:06,554][62782] Avg episode reward: [(0, '26.864')] +[2023-06-21 11:10:07,274][69878] Updated weights for policy 0, policy_version 970 (0.0022) +[2023-06-21 11:10:11,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1263.5). Total num frames: 3977216. Throughput: 0: 306.5. Samples: 492956. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:10:11,553][62782] Avg episode reward: [(0, '26.681')] +[2023-06-21 11:10:16,551][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 3981312. Throughput: 0: 301.7. Samples: 494640. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:10:16,556][62782] Avg episode reward: [(0, '26.049')] +[2023-06-21 11:10:21,552][62782] Fps is (10 sec: 819.1, 60 sec: 1160.4, 300 sec: 1249.6). Total num frames: 3985408. Throughput: 0: 297.3. Samples: 496332. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:10:21,556][62782] Avg episode reward: [(0, '26.103')] +[2023-06-21 11:10:26,551][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 3993600. Throughput: 0: 294.9. Samples: 497172. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:10:26,553][62782] Avg episode reward: [(0, '25.261')] +[2023-06-21 11:10:31,552][62782] Fps is (10 sec: 1228.8, 60 sec: 1160.4, 300 sec: 1249.6). Total num frames: 3997696. Throughput: 0: 290.1. Samples: 498850. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 11:10:31,558][62782] Avg episode reward: [(0, '25.540')] +[2023-06-21 11:10:36,268][69876] Stopping Batcher_0... +[2023-06-21 11:10:36,276][69876] Loop batcher_evt_loop terminating... +[2023-06-21 11:10:36,280][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-06-21 11:10:36,274][62782] Component Batcher_0 stopped! +[2023-06-21 11:10:36,372][69876] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000913_3739648.pth +[2023-06-21 11:10:36,412][69876] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-06-21 11:10:36,504][69876] Stopping LearnerWorker_p0... +[2023-06-21 11:10:36,504][69876] Loop learner_proc0_evt_loop terminating... +[2023-06-21 11:10:36,504][62782] Component LearnerWorker_p0 stopped! +[2023-06-21 11:10:36,799][62782] Component RolloutWorker_w3 stopped! +[2023-06-21 11:10:36,803][62782] Component RolloutWorker_w6 stopped! +[2023-06-21 11:10:36,805][62782] Component RolloutWorker_w2 stopped! +[2023-06-21 11:10:36,806][62782] Component RolloutWorker_w4 stopped! +[2023-06-21 11:10:36,806][62782] Component RolloutWorker_w5 stopped! +[2023-06-21 11:10:36,805][69884] Stopping RolloutWorker_w6... +[2023-06-21 11:10:36,807][62782] Component RolloutWorker_w1 stopped! +[2023-06-21 11:10:36,813][69884] Loop rollout_proc6_evt_loop terminating... +[2023-06-21 11:10:36,805][69881] Stopping RolloutWorker_w3... +[2023-06-21 11:10:36,812][69882] Stopping RolloutWorker_w4... +[2023-06-21 11:10:36,817][69881] Loop rollout_proc3_evt_loop terminating... +[2023-06-21 11:10:36,815][69883] Stopping RolloutWorker_w5... +[2023-06-21 11:10:36,822][69882] Loop rollout_proc4_evt_loop terminating... +[2023-06-21 11:10:36,817][69880] Stopping RolloutWorker_w2... +[2023-06-21 11:10:36,823][69883] Loop rollout_proc5_evt_loop terminating... +[2023-06-21 11:10:36,823][69885] Stopping RolloutWorker_w7... +[2023-06-21 11:10:36,827][69880] Loop rollout_proc2_evt_loop terminating... +[2023-06-21 11:10:36,818][62782] Component RolloutWorker_w7 stopped! +[2023-06-21 11:10:36,829][69885] Loop rollout_proc7_evt_loop terminating... +[2023-06-21 11:10:36,828][62782] Component RolloutWorker_w0 stopped! +[2023-06-21 11:10:36,828][69877] Stopping RolloutWorker_w0... +[2023-06-21 11:10:36,823][69879] Stopping RolloutWorker_w1... +[2023-06-21 11:10:36,832][69879] Loop rollout_proc1_evt_loop terminating... +[2023-06-21 11:10:36,833][69877] Loop rollout_proc0_evt_loop terminating... +[2023-06-21 11:10:37,037][69878] Weights refcount: 2 0 +[2023-06-21 11:10:37,043][69878] Stopping InferenceWorker_p0-w0... +[2023-06-21 11:10:37,045][69878] Loop inference_proc0-0_evt_loop terminating... +[2023-06-21 11:10:37,044][62782] Component InferenceWorker_p0-w0 stopped! +[2023-06-21 11:10:37,049][62782] Waiting for process learner_proc0 to stop... +[2023-06-21 11:10:37,863][62782] Waiting for process inference_proc0-0 to join... +[2023-06-21 11:10:37,866][62782] Waiting for process rollout_proc0 to join... +[2023-06-21 11:10:37,866][62782] Waiting for process rollout_proc1 to join... +[2023-06-21 11:10:37,867][62782] Waiting for process rollout_proc2 to join... +[2023-06-21 11:10:37,867][62782] Waiting for process rollout_proc3 to join... +[2023-06-21 11:10:37,867][62782] Waiting for process rollout_proc4 to join... +[2023-06-21 11:10:37,868][62782] Waiting for process rollout_proc5 to join... +[2023-06-21 11:10:37,872][62782] Waiting for process rollout_proc6 to join... +[2023-06-21 11:10:37,872][62782] Waiting for process rollout_proc7 to join... +[2023-06-21 11:10:37,873][62782] Batcher 0 profile tree view: +batching: 6.4846, releasing_batches: 0.0073 +[2023-06-21 11:10:37,874][62782] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 1040.7824 +update_model: 3.6621 + weight_update: 0.0037 +one_step: 0.0059 + handle_policy_step: 591.4375 + deserialize: 5.7777, stack: 1.0636, obs_to_device_normalize: 41.5510, forward: 517.7502, send_messages: 6.4697 + prepare_outputs: 7.6375 + to_cpu: 0.8247 +[2023-06-21 11:10:37,874][62782] Learner 0 profile tree view: +misc: 0.0015, prepare_batch: 240.7255 +train: 843.3665 + epoch_init: 0.0017, minibatch_init: 0.0076, losses_postprocess: 0.0174, kl_divergence: 0.0864, after_optimizer: 0.5407 + calculate_losses: 479.9649 + losses_init: 0.0010, forward_head: 462.8699, bptt_initial: 0.8487, tail: 0.8752, advantages_returns: 0.0856, losses: 0.3270 + bptt: 14.7727 + bptt_forward_core: 14.4951 + update: 362.3651 + clip: 0.8043 +[2023-06-21 11:10:37,875][62782] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0607, enqueue_policy_requests: 4.2417, env_step: 1623.2834, overhead: 4.3230, complete_rollouts: 0.0809 +save_policy_outputs: 2.0853 + split_output_tensors: 1.0207 +[2023-06-21 11:10:37,875][62782] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.0567, enqueue_policy_requests: 4.4229, env_step: 1622.7306, overhead: 4.2237, complete_rollouts: 0.0818 +save_policy_outputs: 2.0102 + split_output_tensors: 0.9789 +[2023-06-21 11:10:37,876][62782] Loop Runner_EvtLoop terminating... +[2023-06-21 11:10:37,877][62782] Runner profile tree view: +main_loop: 1654.8298 +[2023-06-21 11:10:37,877][62782] Collected {0: 4005888}, FPS: 1207.9 +[2023-06-21 11:16:21,383][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 11:16:21,386][62782] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-21 11:16:21,387][62782] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-21 11:16:21,388][62782] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-21 11:16:21,389][62782] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 11:16:21,390][62782] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-21 11:16:21,391][62782] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 11:16:21,391][62782] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-21 11:16:21,392][62782] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-06-21 11:16:21,393][62782] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-06-21 11:16:21,394][62782] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-21 11:16:21,395][62782] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-21 11:16:21,395][62782] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-21 11:16:21,396][62782] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-21 11:16:21,397][62782] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-21 11:16:21,415][62782] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 11:16:21,419][62782] RunningMeanStd input shape: (1,) +[2023-06-21 11:16:21,451][62782] ConvEncoder: input_channels=3 +[2023-06-21 11:16:21,480][62782] Conv encoder output size: 512 +[2023-06-21 11:16:21,480][62782] Policy head output size: 512 +[2023-06-21 11:16:21,492][62782] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-06-21 11:16:23,138][62782] Num frames 100... +[2023-06-21 11:16:23,858][62782] Num frames 200... +[2023-06-21 11:16:24,581][62782] Num frames 300... +[2023-06-21 11:16:25,374][62782] Num frames 400... +[2023-06-21 11:16:26,147][62782] Num frames 500... +[2023-06-21 11:16:26,855][62782] Num frames 600... +[2023-06-21 11:16:27,557][62782] Num frames 700... +[2023-06-21 11:16:28,344][62782] Num frames 800... +[2023-06-21 11:16:29,079][62782] Num frames 900... +[2023-06-21 11:16:29,865][62782] Avg episode rewards: #0: 18.920, true rewards: #0: 9.920 +[2023-06-21 11:16:29,867][62782] Avg episode reward: 18.920, avg true_objective: 9.920 +[2023-06-21 11:16:29,926][62782] Num frames 1000... +[2023-06-21 11:16:30,751][62782] Num frames 1100... +[2023-06-21 11:16:31,510][62782] Num frames 1200... +[2023-06-21 11:16:32,203][62782] Num frames 1300... +[2023-06-21 11:16:32,965][62782] Num frames 1400... +[2023-06-21 11:16:33,763][62782] Num frames 1500... +[2023-06-21 11:16:34,573][62782] Num frames 1600... +[2023-06-21 11:16:35,275][62782] Num frames 1700... +[2023-06-21 11:16:36,115][62782] Num frames 1800... +[2023-06-21 11:16:36,874][62782] Num frames 1900... +[2023-06-21 11:16:37,734][62782] Num frames 2000... +[2023-06-21 11:16:38,601][62782] Num frames 2100... +[2023-06-21 11:16:39,493][62782] Num frames 2200... +[2023-06-21 11:16:40,265][62782] Num frames 2300... +[2023-06-21 11:16:40,901][62782] Avg episode rewards: #0: 24.340, true rewards: #0: 11.840 +[2023-06-21 11:16:40,902][62782] Avg episode reward: 24.340, avg true_objective: 11.840 +[2023-06-21 11:16:41,131][62782] Num frames 2400... +[2023-06-21 11:16:41,884][62782] Num frames 2500... +[2023-06-21 11:16:42,662][62782] Num frames 2600... +[2023-06-21 11:16:43,439][62782] Num frames 2700... +[2023-06-21 11:16:44,197][62782] Num frames 2800... +[2023-06-21 11:16:44,924][62782] Num frames 2900... +[2023-06-21 11:16:45,328][62782] Avg episode rewards: #0: 19.147, true rewards: #0: 9.813 +[2023-06-21 11:16:45,330][62782] Avg episode reward: 19.147, avg true_objective: 9.813 +[2023-06-21 11:16:45,765][62782] Num frames 3000... +[2023-06-21 11:16:46,676][62782] Num frames 3100... +[2023-06-21 11:16:47,565][62782] Num frames 3200... +[2023-06-21 11:16:48,404][62782] Num frames 3300... +[2023-06-21 11:16:49,223][62782] Num frames 3400... +[2023-06-21 11:16:49,974][62782] Num frames 3500... +[2023-06-21 11:16:50,730][62782] Num frames 3600... +[2023-06-21 11:16:51,466][62782] Num frames 3700... +[2023-06-21 11:16:52,209][62782] Num frames 3800... +[2023-06-21 11:16:52,991][62782] Num frames 3900... +[2023-06-21 11:16:53,767][62782] Num frames 4000... +[2023-06-21 11:16:54,514][62782] Num frames 4100... +[2023-06-21 11:16:55,229][62782] Num frames 4200... +[2023-06-21 11:16:55,982][62782] Num frames 4300... +[2023-06-21 11:16:56,730][62782] Num frames 4400... +[2023-06-21 11:16:57,481][62782] Num frames 4500... +[2023-06-21 11:16:58,347][62782] Num frames 4600... +[2023-06-21 11:16:58,979][62782] Avg episode rewards: #0: 24.900, true rewards: #0: 11.650 +[2023-06-21 11:16:58,981][62782] Avg episode reward: 24.900, avg true_objective: 11.650 +[2023-06-21 11:16:59,339][62782] Num frames 4700... +[2023-06-21 11:17:00,240][62782] Num frames 4800... +[2023-06-21 11:17:01,079][62782] Num frames 4900... +[2023-06-21 11:17:01,975][62782] Num frames 5000... +[2023-06-21 11:17:02,901][62782] Num frames 5100... +[2023-06-21 11:17:03,697][62782] Num frames 5200... +[2023-06-21 11:17:04,462][62782] Num frames 5300... +[2023-06-21 11:17:05,336][62782] Num frames 5400... +[2023-06-21 11:17:06,193][62782] Num frames 5500... +[2023-06-21 11:17:07,090][62782] Num frames 5600... +[2023-06-21 11:17:07,969][62782] Num frames 5700... +[2023-06-21 11:17:08,827][62782] Num frames 5800... +[2023-06-21 11:17:09,737][62782] Num frames 5900... +[2023-06-21 11:17:10,594][62782] Num frames 6000... +[2023-06-21 11:17:11,469][62782] Num frames 6100... +[2023-06-21 11:17:12,328][62782] Num frames 6200... +[2023-06-21 11:17:13,224][62782] Num frames 6300... +[2023-06-21 11:17:13,825][62782] Avg episode rewards: #0: 28.912, true rewards: #0: 12.712 +[2023-06-21 11:17:13,826][62782] Avg episode reward: 28.912, avg true_objective: 12.712 +[2023-06-21 11:17:14,215][62782] Num frames 6400... +[2023-06-21 11:17:15,081][62782] Num frames 6500... +[2023-06-21 11:17:15,949][62782] Num frames 6600... +[2023-06-21 11:17:16,786][62782] Num frames 6700... +[2023-06-21 11:17:17,649][62782] Num frames 6800... +[2023-06-21 11:17:18,524][62782] Num frames 6900... +[2023-06-21 11:17:19,414][62782] Num frames 7000... +[2023-06-21 11:17:20,294][62782] Num frames 7100... +[2023-06-21 11:17:21,163][62782] Num frames 7200... +[2023-06-21 11:17:22,050][62782] Num frames 7300... +[2023-06-21 11:17:22,825][62782] Num frames 7400... +[2023-06-21 11:17:23,291][62782] Avg episode rewards: #0: 28.407, true rewards: #0: 12.407 +[2023-06-21 11:17:23,294][62782] Avg episode reward: 28.407, avg true_objective: 12.407 +[2023-06-21 11:17:23,778][62782] Num frames 7500... +[2023-06-21 11:17:24,622][62782] Num frames 7600... +[2023-06-21 11:17:25,563][62782] Num frames 7700... +[2023-06-21 11:17:26,412][62782] Num frames 7800... +[2023-06-21 11:17:27,300][62782] Num frames 7900... +[2023-06-21 11:17:28,218][62782] Num frames 8000... +[2023-06-21 11:17:28,491][62782] Avg episode rewards: #0: 25.743, true rewards: #0: 11.457 +[2023-06-21 11:17:28,493][62782] Avg episode reward: 25.743, avg true_objective: 11.457 +[2023-06-21 11:17:29,162][62782] Num frames 8100... +[2023-06-21 11:17:30,053][62782] Num frames 8200... +[2023-06-21 11:17:30,929][62782] Num frames 8300... +[2023-06-21 11:17:31,823][62782] Num frames 8400... +[2023-06-21 11:17:32,668][62782] Num frames 8500... +[2023-06-21 11:17:33,522][62782] Num frames 8600... +[2023-06-21 11:17:34,391][62782] Num frames 8700... +[2023-06-21 11:17:35,307][62782] Num frames 8800... +[2023-06-21 11:17:36,172][62782] Num frames 8900... +[2023-06-21 11:17:37,060][62782] Num frames 9000... +[2023-06-21 11:17:37,904][62782] Num frames 9100... +[2023-06-21 11:17:38,724][62782] Num frames 9200... +[2023-06-21 11:17:39,605][62782] Num frames 9300... +[2023-06-21 11:17:40,491][62782] Num frames 9400... +[2023-06-21 11:17:41,440][62782] Num frames 9500... +[2023-06-21 11:17:42,310][62782] Num frames 9600... +[2023-06-21 11:17:43,191][62782] Num frames 9700... +[2023-06-21 11:17:44,114][62782] Num frames 9800... +[2023-06-21 11:17:44,945][62782] Num frames 9900... +[2023-06-21 11:17:45,799][62782] Num frames 10000... +[2023-06-21 11:17:46,711][62782] Num frames 10100... +[2023-06-21 11:17:46,990][62782] Avg episode rewards: #0: 29.650, true rewards: #0: 12.650 +[2023-06-21 11:17:46,992][62782] Avg episode reward: 29.650, avg true_objective: 12.650 +[2023-06-21 11:17:47,741][62782] Num frames 10200... +[2023-06-21 11:17:48,549][62782] Num frames 10300... +[2023-06-21 11:17:49,448][62782] Num frames 10400... +[2023-06-21 11:17:50,380][62782] Num frames 10500... +[2023-06-21 11:17:51,278][62782] Num frames 10600... +[2023-06-21 11:17:52,210][62782] Num frames 10700... +[2023-06-21 11:17:53,150][62782] Num frames 10800... +[2023-06-21 11:17:54,110][62782] Num frames 10900... +[2023-06-21 11:17:55,057][62782] Num frames 11000... +[2023-06-21 11:17:55,972][62782] Num frames 11100... +[2023-06-21 11:17:56,884][62782] Num frames 11200... +[2023-06-21 11:17:57,846][62782] Num frames 11300... +[2023-06-21 11:17:58,805][62782] Num frames 11400... +[2023-06-21 11:17:59,510][62782] Avg episode rewards: #0: 30.404, true rewards: #0: 12.738 +[2023-06-21 11:17:59,513][62782] Avg episode reward: 30.404, avg true_objective: 12.738 +[2023-06-21 11:17:59,834][62782] Num frames 11500... +[2023-06-21 11:18:00,779][62782] Num frames 11600... +[2023-06-21 11:18:01,729][62782] Num frames 11700... +[2023-06-21 11:18:02,663][62782] Num frames 11800... +[2023-06-21 11:18:03,603][62782] Num frames 11900... +[2023-06-21 11:18:04,573][62782] Num frames 12000... +[2023-06-21 11:18:05,559][62782] Num frames 12100... +[2023-06-21 11:18:06,517][62782] Num frames 12200... +[2023-06-21 11:18:07,439][62782] Num frames 12300... +[2023-06-21 11:18:07,802][62782] Avg episode rewards: #0: 29.228, true rewards: #0: 12.328 +[2023-06-21 11:18:07,804][62782] Avg episode reward: 29.228, avg true_objective: 12.328 +[2023-06-21 11:18:24,353][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4! +[2023-06-21 11:20:01,788][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 11:20:01,790][62782] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-21 11:20:01,790][62782] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-21 11:20:01,790][62782] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-21 11:20:01,791][62782] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 11:20:01,791][62782] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-21 11:20:01,791][62782] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-06-21 11:20:01,792][62782] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-21 11:20:01,792][62782] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-06-21 11:20:01,792][62782] Adding new argument 'hf_repository'='mihirdeo16/vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-06-21 11:20:01,792][62782] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-21 11:20:01,793][62782] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-21 11:20:01,793][62782] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-21 11:20:01,793][62782] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-21 11:20:01,794][62782] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-21 11:20:01,801][62782] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 11:20:01,804][62782] RunningMeanStd input shape: (1,) +[2023-06-21 11:20:01,820][62782] ConvEncoder: input_channels=3 +[2023-06-21 11:20:01,845][62782] Conv encoder output size: 512 +[2023-06-21 11:20:01,845][62782] Policy head output size: 512 +[2023-06-21 11:20:01,864][62782] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-06-21 11:20:04,020][62782] Num frames 100... +[2023-06-21 11:20:04,746][62782] Num frames 200... +[2023-06-21 11:20:05,556][62782] Num frames 300... +[2023-06-21 11:20:06,401][62782] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2023-06-21 11:20:06,403][62782] Avg episode reward: 3.840, avg true_objective: 3.840 +[2023-06-21 11:20:06,544][62782] Num frames 400... +[2023-06-21 11:20:07,449][62782] Num frames 500... +[2023-06-21 11:20:08,351][62782] Num frames 600... +[2023-06-21 11:20:09,244][62782] Num frames 700... +[2023-06-21 11:20:10,161][62782] Num frames 800... +[2023-06-21 11:20:11,082][62782] Num frames 900... +[2023-06-21 11:20:11,912][62782] Num frames 1000... +[2023-06-21 11:20:12,681][62782] Num frames 1100... +[2023-06-21 11:20:13,505][62782] Num frames 1200... +[2023-06-21 11:20:14,384][62782] Num frames 1300... +[2023-06-21 11:20:15,260][62782] Num frames 1400... +[2023-06-21 11:20:16,187][62782] Num frames 1500... +[2023-06-21 11:20:17,107][62782] Num frames 1600... +[2023-06-21 11:20:17,998][62782] Num frames 1700... +[2023-06-21 11:20:18,874][62782] Num frames 1800... +[2023-06-21 11:20:19,781][62782] Num frames 1900... +[2023-06-21 11:20:20,686][62782] Num frames 2000... +[2023-06-21 11:20:21,590][62782] Num frames 2100... +[2023-06-21 11:20:22,482][62782] Num frames 2200... +[2023-06-21 11:20:22,966][62782] Avg episode rewards: #0: 23.200, true rewards: #0: 11.200 +[2023-06-21 11:20:22,967][62782] Avg episode reward: 23.200, avg true_objective: 11.200 +[2023-06-21 11:20:23,520][62782] Num frames 2300... +[2023-06-21 11:20:24,411][62782] Num frames 2400... +[2023-06-21 11:20:25,313][62782] Num frames 2500... +[2023-06-21 11:20:26,198][62782] Num frames 2600... +[2023-06-21 11:20:27,127][62782] Num frames 2700... +[2023-06-21 11:20:28,015][62782] Num frames 2800... +[2023-06-21 11:20:28,859][62782] Num frames 2900... +[2023-06-21 11:20:29,781][62782] Num frames 3000... +[2023-06-21 11:20:30,646][62782] Num frames 3100... +[2023-06-21 11:20:31,556][62782] Num frames 3200... +[2023-06-21 11:20:32,413][62782] Num frames 3300... +[2023-06-21 11:20:33,280][62782] Num frames 3400... +[2023-06-21 11:20:33,726][62782] Avg episode rewards: #0: 24.460, true rewards: #0: 11.460 +[2023-06-21 11:20:33,726][62782] Avg episode reward: 24.460, avg true_objective: 11.460 +[2023-06-21 11:20:34,281][62782] Num frames 3500... +[2023-06-21 11:20:35,123][62782] Num frames 3600... +[2023-06-21 11:20:36,028][62782] Num frames 3700... +[2023-06-21 11:20:36,910][62782] Num frames 3800... +[2023-06-21 11:20:37,497][62782] Avg episode rewards: #0: 19.885, true rewards: #0: 9.635 +[2023-06-21 11:20:37,499][62782] Avg episode reward: 19.885, avg true_objective: 9.635 +[2023-06-21 11:20:37,902][62782] Num frames 3900... +[2023-06-21 11:20:38,810][62782] Num frames 4000... +[2023-06-21 11:20:39,699][62782] Num frames 4100... +[2023-06-21 11:20:40,594][62782] Num frames 4200... +[2023-06-21 11:20:41,481][62782] Num frames 4300... +[2023-06-21 11:20:42,363][62782] Num frames 4400... +[2023-06-21 11:20:43,269][62782] Num frames 4500... +[2023-06-21 11:20:43,596][62782] Avg episode rewards: #0: 18.852, true rewards: #0: 9.052 +[2023-06-21 11:20:43,598][62782] Avg episode reward: 18.852, avg true_objective: 9.052 +[2023-06-21 11:20:44,264][62782] Num frames 4600... +[2023-06-21 11:20:45,114][62782] Num frames 4700... +[2023-06-21 11:20:45,970][62782] Num frames 4800... +[2023-06-21 11:20:46,884][62782] Num frames 4900... +[2023-06-21 11:20:47,764][62782] Num frames 5000... +[2023-06-21 11:20:48,626][62782] Num frames 5100... +[2023-06-21 11:20:49,547][62782] Num frames 5200... +[2023-06-21 11:20:50,453][62782] Num frames 5300... +[2023-06-21 11:20:51,334][62782] Num frames 5400... +[2023-06-21 11:20:51,976][62782] Avg episode rewards: #0: 20.600, true rewards: #0: 9.100 +[2023-06-21 11:20:51,977][62782] Avg episode reward: 20.600, avg true_objective: 9.100 +[2023-06-21 11:20:52,330][62782] Num frames 5500... +[2023-06-21 11:20:53,183][62782] Num frames 5600... +[2023-06-21 11:20:54,063][62782] Num frames 5700... +[2023-06-21 11:20:54,947][62782] Num frames 5800... +[2023-06-21 11:20:55,819][62782] Num frames 5900... +[2023-06-21 11:20:56,278][62782] Avg episode rewards: #0: 18.771, true rewards: #0: 8.486 +[2023-06-21 11:20:56,280][62782] Avg episode reward: 18.771, avg true_objective: 8.486 +[2023-06-21 11:20:56,814][62782] Num frames 6000... +[2023-06-21 11:20:57,672][62782] Num frames 6100... +[2023-06-21 11:20:58,528][62782] Num frames 6200... +[2023-06-21 11:20:59,432][62782] Num frames 6300... +[2023-06-21 11:21:00,338][62782] Num frames 6400... +[2023-06-21 11:21:01,188][62782] Num frames 6500... +[2023-06-21 11:21:02,100][62782] Num frames 6600... +[2023-06-21 11:21:02,969][62782] Num frames 6700... +[2023-06-21 11:21:03,877][62782] Num frames 6800... +[2023-06-21 11:21:04,795][62782] Num frames 6900... +[2023-06-21 11:21:05,687][62782] Num frames 7000... +[2023-06-21 11:21:06,589][62782] Num frames 7100... +[2023-06-21 11:21:07,487][62782] Num frames 7200... +[2023-06-21 11:21:08,374][62782] Num frames 7300... +[2023-06-21 11:21:08,804][62782] Avg episode rewards: #0: 21.171, true rewards: #0: 9.171 +[2023-06-21 11:21:08,807][62782] Avg episode reward: 21.171, avg true_objective: 9.171 +[2023-06-21 11:21:09,373][62782] Num frames 7400... +[2023-06-21 11:21:10,275][62782] Num frames 7500... +[2023-06-21 11:21:11,165][62782] Num frames 7600... +[2023-06-21 11:21:12,049][62782] Num frames 7700... +[2023-06-21 11:21:12,943][62782] Num frames 7800... +[2023-06-21 11:21:13,831][62782] Num frames 7900... +[2023-06-21 11:21:14,636][62782] Avg episode rewards: #0: 20.530, true rewards: #0: 8.863 +[2023-06-21 11:21:14,637][62782] Avg episode reward: 20.530, avg true_objective: 8.863 +[2023-06-21 11:21:14,835][62782] Num frames 8000... +[2023-06-21 11:21:15,696][62782] Num frames 8100... +[2023-06-21 11:21:16,576][62782] Num frames 8200... +[2023-06-21 11:21:17,499][62782] Num frames 8300... +[2023-06-21 11:21:18,384][62782] Num frames 8400... +[2023-06-21 11:21:19,276][62782] Num frames 8500... +[2023-06-21 11:21:20,150][62782] Num frames 8600... +[2023-06-21 11:21:20,939][62782] Num frames 8700... +[2023-06-21 11:21:21,716][62782] Num frames 8800... +[2023-06-21 11:21:22,488][62782] Num frames 8900... +[2023-06-21 11:21:22,998][62782] Avg episode rewards: #0: 21.054, true rewards: #0: 8.954 +[2023-06-21 11:21:22,998][62782] Avg episode reward: 21.054, avg true_objective: 8.954 +[2023-06-21 11:21:35,304][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4!