diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1988 @@ +[2023-06-20 23:12:18,875][33484] Saving configuration to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json... +[2023-06-20 23:12:18,897][33484] Rollout worker 0 uses device cpu +[2023-06-20 23:12:18,898][33484] Rollout worker 1 uses device cpu +[2023-06-20 23:12:18,898][33484] Rollout worker 2 uses device cpu +[2023-06-20 23:12:18,899][33484] Rollout worker 3 uses device cpu +[2023-06-20 23:12:18,899][33484] Rollout worker 4 uses device cpu +[2023-06-20 23:12:18,899][33484] Rollout worker 5 uses device cpu +[2023-06-20 23:12:18,900][33484] Rollout worker 6 uses device cpu +[2023-06-20 23:12:18,900][33484] Rollout worker 7 uses device cpu +[2023-06-20 23:13:38,093][33484] Environment doom_basic already registered, overwriting... +[2023-06-20 23:13:38,095][33484] Environment doom_two_colors_easy already registered, overwriting... +[2023-06-20 23:13:38,096][33484] Environment doom_two_colors_hard already registered, overwriting... +[2023-06-20 23:13:38,097][33484] Environment doom_dm already registered, overwriting... +[2023-06-20 23:13:38,098][33484] Environment doom_dwango5 already registered, overwriting... +[2023-06-20 23:13:38,098][33484] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-06-20 23:13:38,099][33484] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-06-20 23:13:38,100][33484] Environment doom_my_way_home already registered, overwriting... +[2023-06-20 23:13:38,100][33484] Environment doom_deadly_corridor already registered, overwriting... +[2023-06-20 23:13:38,101][33484] Environment doom_defend_the_center already registered, overwriting... +[2023-06-20 23:13:38,102][33484] Environment doom_defend_the_line already registered, overwriting... +[2023-06-20 23:13:38,103][33484] Environment doom_health_gathering already registered, overwriting... +[2023-06-20 23:13:38,103][33484] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-06-20 23:13:38,104][33484] Environment doom_battle already registered, overwriting... +[2023-06-20 23:13:38,104][33484] Environment doom_battle2 already registered, overwriting... +[2023-06-20 23:13:38,105][33484] Environment doom_duel_bots already registered, overwriting... +[2023-06-20 23:13:38,108][33484] Environment doom_deathmatch_bots already registered, overwriting... +[2023-06-20 23:13:38,109][33484] Environment doom_duel already registered, overwriting... +[2023-06-20 23:13:38,109][33484] Environment doom_deathmatch_full already registered, overwriting... +[2023-06-20 23:13:38,110][33484] Environment doom_benchmark already registered, overwriting... +[2023-06-20 23:13:38,110][33484] register_encoder_factory: +[2023-06-20 23:13:38,146][33484] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-20 23:13:38,151][33484] Experiment dir /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment already exists! +[2023-06-20 23:13:38,152][33484] Resuming existing experiment from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment... +[2023-06-20 23:13:38,152][33484] Weights and Biases integration disabled +[2023-06-20 23:13:38,155][33484] Environment var CUDA_VISIBLE_DEVICES is + +[2023-06-20 23:13:38,890][33484] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/Users/md/Code/python/jubilant-memory/RL/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=4000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=02ef46648112e6fd3adc4475dfd889e784c0ef87 +git_repo_name=https://github.com/mihirdeo16/jubilant-memory.git +[2023-06-20 23:13:38,891][33484] Saving configuration to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json... +[2023-06-20 23:13:38,904][33484] Rollout worker 0 uses device cpu +[2023-06-20 23:13:38,905][33484] Rollout worker 1 uses device cpu +[2023-06-20 23:13:38,906][33484] Rollout worker 2 uses device cpu +[2023-06-20 23:13:38,906][33484] Rollout worker 3 uses device cpu +[2023-06-20 23:13:38,906][33484] Rollout worker 4 uses device cpu +[2023-06-20 23:13:38,906][33484] Rollout worker 5 uses device cpu +[2023-06-20 23:13:38,907][33484] Rollout worker 6 uses device cpu +[2023-06-20 23:13:38,907][33484] Rollout worker 7 uses device cpu +[2023-06-20 23:17:03,197][33484] Environment doom_basic already registered, overwriting... +[2023-06-20 23:17:03,199][33484] Environment doom_two_colors_easy already registered, overwriting... +[2023-06-20 23:17:03,200][33484] Environment doom_two_colors_hard already registered, overwriting... +[2023-06-20 23:17:03,201][33484] Environment doom_dm already registered, overwriting... +[2023-06-20 23:17:03,202][33484] Environment doom_dwango5 already registered, overwriting... +[2023-06-20 23:17:03,203][33484] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-06-20 23:17:03,203][33484] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-06-20 23:17:03,204][33484] Environment doom_my_way_home already registered, overwriting... +[2023-06-20 23:17:03,205][33484] Environment doom_deadly_corridor already registered, overwriting... +[2023-06-20 23:17:03,206][33484] Environment doom_defend_the_center already registered, overwriting... +[2023-06-20 23:17:03,206][33484] Environment doom_defend_the_line already registered, overwriting... +[2023-06-20 23:17:03,207][33484] Environment doom_health_gathering already registered, overwriting... +[2023-06-20 23:17:03,208][33484] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-06-20 23:17:03,208][33484] Environment doom_battle already registered, overwriting... +[2023-06-20 23:17:03,209][33484] Environment doom_battle2 already registered, overwriting... +[2023-06-20 23:17:03,210][33484] Environment doom_duel_bots already registered, overwriting... +[2023-06-20 23:17:03,210][33484] Environment doom_deathmatch_bots already registered, overwriting... +[2023-06-20 23:17:03,210][33484] Environment doom_duel already registered, overwriting... +[2023-06-20 23:17:03,211][33484] Environment doom_deathmatch_full already registered, overwriting... +[2023-06-20 23:17:03,211][33484] Environment doom_benchmark already registered, overwriting... +[2023-06-20 23:17:03,212][33484] register_encoder_factory: +[2023-06-20 23:17:03,251][33484] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-20 23:17:03,257][33484] Experiment dir /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment already exists! +[2023-06-20 23:17:03,259][33484] Resuming existing experiment from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment... +[2023-06-20 23:17:03,260][33484] Weights and Biases integration disabled +[2023-06-20 23:17:03,264][33484] Environment var CUDA_VISIBLE_DEVICES is + +[2023-06-20 23:17:04,047][33484] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/Users/md/Code/python/jubilant-memory/RL/train_dir +restart_behavior=resume +device=cpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=4000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=02ef46648112e6fd3adc4475dfd889e784c0ef87 +git_repo_name=https://github.com/mihirdeo16/jubilant-memory.git +[2023-06-20 23:17:04,048][33484] Saving configuration to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json... +[2023-06-20 23:17:04,060][33484] Rollout worker 0 uses device cpu +[2023-06-20 23:17:04,061][33484] Rollout worker 1 uses device cpu +[2023-06-20 23:17:04,062][33484] Rollout worker 2 uses device cpu +[2023-06-20 23:17:04,063][33484] Rollout worker 3 uses device cpu +[2023-06-20 23:17:04,063][33484] Rollout worker 4 uses device cpu +[2023-06-20 23:17:04,063][33484] Rollout worker 5 uses device cpu +[2023-06-20 23:17:04,064][33484] Rollout worker 6 uses device cpu +[2023-06-20 23:17:04,064][33484] Rollout worker 7 uses device cpu +[2023-06-20 23:17:04,103][33484] InferenceWorker_p0-w0: min num requests: 2 +[2023-06-20 23:17:04,139][33484] Starting all processes... +[2023-06-20 23:17:04,139][33484] Starting process learner_proc0 +[2023-06-20 23:17:04,193][33484] Starting all processes... +[2023-06-20 23:17:04,196][33484] Starting process inference_proc0-0 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc0 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc1 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc2 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc3 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc4 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc5 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc6 +[2023-06-20 23:17:04,197][33484] Starting process rollout_proc7 +[2023-06-20 23:17:06,218][33882] On MacOS, not setting affinity +[2023-06-20 23:17:06,227][33878] Starting seed is not provided +[2023-06-20 23:17:06,228][33878] Initializing actor-critic model on device cpu +[2023-06-20 23:17:06,228][33878] RunningMeanStd input shape: (3, 72, 128) +[2023-06-20 23:17:06,229][33878] RunningMeanStd input shape: (1,) +[2023-06-20 23:17:06,245][33878] ConvEncoder: input_channels=3 +[2023-06-20 23:17:06,257][33883] On MacOS, not setting affinity +[2023-06-20 23:17:06,257][33884] On MacOS, not setting affinity +[2023-06-20 23:17:06,261][33888] On MacOS, not setting affinity +[2023-06-20 23:17:06,276][33885] On MacOS, not setting affinity +[2023-06-20 23:17:06,276][33880] On MacOS, not setting affinity +[2023-06-20 23:17:06,283][33881] On MacOS, not setting affinity +[2023-06-20 23:17:06,310][33887] On MacOS, not setting affinity +[2023-06-20 23:17:06,329][33878] Conv encoder output size: 512 +[2023-06-20 23:17:06,329][33878] Policy head output size: 512 +[2023-06-20 23:17:06,345][33878] Created Actor Critic model with architecture: +[2023-06-20 23:17:06,345][33878] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-06-20 23:17:06,348][33878] Using optimizer +[2023-06-20 23:17:06,348][33878] No checkpoints found +[2023-06-20 23:17:06,348][33878] Did not load from checkpoint, starting from scratch! +[2023-06-20 23:17:06,349][33878] Initialized policy 0 weights for model version 0 +[2023-06-20 23:17:06,350][33878] LearnerWorker_p0 finished initialization! +[2023-06-20 23:17:06,351][33879] RunningMeanStd input shape: (3, 72, 128) +[2023-06-20 23:17:06,351][33879] RunningMeanStd input shape: (1,) +[2023-06-20 23:17:06,358][33879] ConvEncoder: input_channels=3 +[2023-06-20 23:17:06,423][33879] Conv encoder output size: 512 +[2023-06-20 23:17:06,424][33879] Policy head output size: 512 +[2023-06-20 23:17:06,431][33484] Inference worker 0-0 is ready! +[2023-06-20 23:17:06,433][33484] All inference workers are ready! Signal rollout workers to start! +[2023-06-20 23:17:06,468][33883] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,470][33888] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,471][33880] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,475][33885] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,475][33884] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,476][33887] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,479][33882] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:06,481][33881] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:17:08,268][33484] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-20 23:17:08,618][33881] Decorrelating experience for 0 frames... +[2023-06-20 23:17:08,618][33882] Decorrelating experience for 0 frames... +[2023-06-20 23:17:08,627][33885] Decorrelating experience for 0 frames... +[2023-06-20 23:17:08,632][33880] Decorrelating experience for 0 frames... +[2023-06-20 23:17:08,634][33888] Decorrelating experience for 0 frames... +[2023-06-20 23:17:09,679][33882] Decorrelating experience for 32 frames... +[2023-06-20 23:17:09,679][33880] Decorrelating experience for 32 frames... +[2023-06-20 23:17:09,679][33885] Decorrelating experience for 32 frames... +[2023-06-20 23:17:09,679][33881] Decorrelating experience for 32 frames... +[2023-06-20 23:17:09,699][33887] Decorrelating experience for 0 frames... +[2023-06-20 23:17:09,700][33884] Decorrelating experience for 0 frames... +[2023-06-20 23:17:10,538][33884] Decorrelating experience for 32 frames... +[2023-06-20 23:17:10,538][33887] Decorrelating experience for 32 frames... +[2023-06-20 23:17:10,541][33888] Decorrelating experience for 32 frames... +[2023-06-20 23:17:11,454][33880] Decorrelating experience for 64 frames... +[2023-06-20 23:17:11,455][33881] Decorrelating experience for 64 frames... +[2023-06-20 23:17:11,455][33885] Decorrelating experience for 64 frames... +[2023-06-20 23:17:11,455][33882] Decorrelating experience for 64 frames... +[2023-06-20 23:17:12,306][33883] Decorrelating experience for 0 frames... +[2023-06-20 23:17:12,309][33884] Decorrelating experience for 64 frames... +[2023-06-20 23:17:12,309][33887] Decorrelating experience for 64 frames... +[2023-06-20 23:17:13,079][33883] Decorrelating experience for 32 frames... +[2023-06-20 23:17:13,079][33888] Decorrelating experience for 64 frames... +[2023-06-20 23:17:13,269][33484] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-20 23:17:13,916][33880] Decorrelating experience for 96 frames... +[2023-06-20 23:17:13,917][33885] Decorrelating experience for 96 frames... +[2023-06-20 23:17:13,922][33882] Decorrelating experience for 96 frames... +[2023-06-20 23:17:14,809][33887] Decorrelating experience for 96 frames... +[2023-06-20 23:17:14,815][33883] Decorrelating experience for 64 frames... +[2023-06-20 23:17:14,816][33884] Decorrelating experience for 96 frames... +[2023-06-20 23:17:14,836][33881] Decorrelating experience for 96 frames... +[2023-06-20 23:17:15,458][33888] Decorrelating experience for 96 frames... +[2023-06-20 23:17:17,109][33883] Decorrelating experience for 96 frames... +[2023-06-20 23:17:18,268][33484] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-20 23:17:18,269][33484] Avg episode reward: [(0, '0.320')] +[2023-06-20 23:17:23,269][33484] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 89.6. Samples: 1344. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-20 23:17:23,271][33484] Avg episode reward: [(0, '1.377')] +[2023-06-20 23:17:24,087][33484] Heartbeat connected on Batcher_0 +[2023-06-20 23:17:24,121][33484] Heartbeat connected on InferenceWorker_p0-w0 +[2023-06-20 23:17:24,171][33484] Heartbeat connected on RolloutWorker_w2 +[2023-06-20 23:17:24,181][33484] Heartbeat connected on RolloutWorker_w0 +[2023-06-20 23:17:24,198][33484] Heartbeat connected on RolloutWorker_w4 +[2023-06-20 23:17:24,199][33484] Heartbeat connected on RolloutWorker_w1 +[2023-06-20 23:17:24,203][33484] Heartbeat connected on RolloutWorker_w5 +[2023-06-20 23:17:24,213][33484] Heartbeat connected on RolloutWorker_w3 +[2023-06-20 23:17:24,222][33484] Heartbeat connected on RolloutWorker_w6 +[2023-06-20 23:17:24,267][33484] Heartbeat connected on RolloutWorker_w7 +[2023-06-20 23:17:25,027][33484] Heartbeat connected on LearnerWorker_p0 +[2023-06-20 23:17:28,268][33484] Fps is (10 sec: 819.2, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 8192. Throughput: 0: 162.3. Samples: 3246. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-06-20 23:17:28,270][33484] Avg episode reward: [(0, '2.748')] +[2023-06-20 23:17:33,268][33484] Fps is (10 sec: 1228.9, 60 sec: 491.5, 300 sec: 491.5). Total num frames: 12288. Throughput: 0: 167.7. Samples: 4192. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-20 23:17:33,271][33484] Avg episode reward: [(0, '3.151')] +[2023-06-20 23:17:38,269][33484] Fps is (10 sec: 1228.7, 60 sec: 682.6, 300 sec: 682.6). Total num frames: 20480. Throughput: 0: 203.3. Samples: 6098. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:17:38,270][33484] Avg episode reward: [(0, '3.580')] +[2023-06-20 23:17:43,269][33484] Fps is (10 sec: 1228.8, 60 sec: 702.2, 300 sec: 702.2). Total num frames: 24576. Throughput: 0: 228.2. Samples: 7986. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:17:43,271][33484] Avg episode reward: [(0, '3.854')] +[2023-06-20 23:17:48,269][33484] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 32768. Throughput: 0: 223.5. Samples: 8940. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:17:48,271][33484] Avg episode reward: [(0, '3.929')] +[2023-06-20 23:17:53,268][33484] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 36864. Throughput: 0: 239.8. Samples: 10790. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-20 23:17:53,271][33484] Avg episode reward: [(0, '4.252')] +[2023-06-20 23:17:53,993][33879] Updated weights for policy 0, policy_version 10 (0.0018) +[2023-06-20 23:17:58,268][33484] Fps is (10 sec: 1228.8, 60 sec: 901.1, 300 sec: 901.1). Total num frames: 45056. Throughput: 0: 281.9. Samples: 12684. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-20 23:17:58,269][33484] Avg episode reward: [(0, '4.445')] +[2023-06-20 23:18:03,269][33484] Fps is (10 sec: 1228.7, 60 sec: 893.6, 300 sec: 893.6). Total num frames: 49152. Throughput: 0: 302.9. Samples: 13630. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) +[2023-06-20 23:18:03,272][33484] Avg episode reward: [(0, '4.487')] +[2023-06-20 23:18:08,269][33484] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 955.7). Total num frames: 57344. Throughput: 0: 314.9. Samples: 15514. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:08,270][33484] Avg episode reward: [(0, '4.483')] +[2023-06-20 23:18:13,270][33484] Fps is (10 sec: 1228.7, 60 sec: 1024.0, 300 sec: 945.2). Total num frames: 61440. Throughput: 0: 314.6. Samples: 17404. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:13,275][33484] Avg episode reward: [(0, '4.386')] +[2023-06-20 23:18:18,265][33484] Fps is (10 sec: 1229.3, 60 sec: 1160.6, 300 sec: 994.8). Total num frames: 69632. Throughput: 0: 314.5. Samples: 18344. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:18,266][33484] Avg episode reward: [(0, '4.367')] +[2023-06-20 23:18:23,270][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 983.0). Total num frames: 73728. Throughput: 0: 314.3. Samples: 20242. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:23,273][33484] Avg episode reward: [(0, '4.376')] +[2023-06-20 23:18:26,473][33879] Updated weights for policy 0, policy_version 20 (0.0012) +[2023-06-20 23:18:28,267][33484] Fps is (10 sec: 1228.5, 60 sec: 1228.8, 300 sec: 1024.0). Total num frames: 81920. Throughput: 0: 314.6. Samples: 22142. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:28,268][33484] Avg episode reward: [(0, '4.513')] +[2023-06-20 23:18:33,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1011.9). Total num frames: 86016. Throughput: 0: 314.1. Samples: 23074. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:33,273][33484] Avg episode reward: [(0, '4.401')] +[2023-06-20 23:18:38,269][33484] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1046.7). Total num frames: 94208. Throughput: 0: 314.8. Samples: 24956. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:38,269][33484] Avg episode reward: [(0, '4.401')] +[2023-06-20 23:18:43,269][33484] Fps is (10 sec: 1638.4, 60 sec: 1297.1, 300 sec: 1077.9). Total num frames: 102400. Throughput: 0: 314.8. Samples: 26852. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:43,271][33484] Avg episode reward: [(0, '4.443')] +[2023-06-20 23:18:43,273][33878] Saving new best policy, reward=4.443! +[2023-06-20 23:18:48,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1065.0). Total num frames: 106496. Throughput: 0: 314.7. Samples: 27792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:48,269][33484] Avg episode reward: [(0, '4.453')] +[2023-06-20 23:18:49,238][33878] Saving new best policy, reward=4.453! +[2023-06-20 23:18:53,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1092.3). Total num frames: 114688. Throughput: 0: 315.3. Samples: 29704. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:53,270][33484] Avg episode reward: [(0, '4.491')] +[2023-06-20 23:18:53,273][33878] Saving new best policy, reward=4.491! +[2023-06-20 23:18:58,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1079.8). Total num frames: 118784. Throughput: 0: 315.4. Samples: 31596. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:18:58,271][33484] Avg episode reward: [(0, '4.466')] +[2023-06-20 23:18:58,923][33879] Updated weights for policy 0, policy_version 30 (0.0009) +[2023-06-20 23:19:03,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1104.1). Total num frames: 126976. Throughput: 0: 315.7. Samples: 32550. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:03,269][33484] Avg episode reward: [(0, '4.545')] +[2023-06-20 23:19:03,272][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000031_126976.pth... +[2023-06-20 23:19:03,345][33878] Saving new best policy, reward=4.545! +[2023-06-20 23:19:08,267][33484] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1092.3). Total num frames: 131072. Throughput: 0: 315.5. Samples: 34438. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:08,269][33484] Avg episode reward: [(0, '4.547')] +[2023-06-20 23:19:08,626][33878] Saving new best policy, reward=4.547! +[2023-06-20 23:19:13,269][33484] Fps is (10 sec: 1228.6, 60 sec: 1297.1, 300 sec: 1114.1). Total num frames: 139264. Throughput: 0: 315.5. Samples: 36342. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:13,272][33484] Avg episode reward: [(0, '4.547')] +[2023-06-20 23:19:18,268][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1102.8). Total num frames: 143360. Throughput: 0: 315.6. Samples: 37276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:18,269][33484] Avg episode reward: [(0, '4.501')] +[2023-06-20 23:19:23,264][33484] Fps is (10 sec: 1229.4, 60 sec: 1297.2, 300 sec: 1122.6). Total num frames: 151552. Throughput: 0: 316.2. Samples: 39182. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:23,265][33484] Avg episode reward: [(0, '4.473')] +[2023-06-20 23:19:28,269][33484] Fps is (10 sec: 1638.2, 60 sec: 1297.0, 300 sec: 1141.0). Total num frames: 159744. Throughput: 0: 316.1. Samples: 41078. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:28,271][33484] Avg episode reward: [(0, '4.395')] +[2023-06-20 23:19:31,238][33879] Updated weights for policy 0, policy_version 40 (0.0007) +[2023-06-20 23:19:33,267][33484] Fps is (10 sec: 1228.4, 60 sec: 1297.1, 300 sec: 1129.9). Total num frames: 163840. Throughput: 0: 316.4. Samples: 42028. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:33,268][33484] Avg episode reward: [(0, '4.513')] +[2023-06-20 23:19:38,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1146.9). Total num frames: 172032. Throughput: 0: 315.6. Samples: 43906. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:38,271][33484] Avg episode reward: [(0, '4.432')] +[2023-06-20 23:19:43,267][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1136.3). Total num frames: 176128. Throughput: 0: 315.8. Samples: 45806. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:43,268][33484] Avg episode reward: [(0, '4.455')] +[2023-06-20 23:19:48,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1152.0). Total num frames: 184320. Throughput: 0: 315.5. Samples: 46748. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:48,270][33484] Avg episode reward: [(0, '4.602')] +[2023-06-20 23:19:48,271][33878] Saving new best policy, reward=4.602! +[2023-06-20 23:19:53,268][33484] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1141.9). Total num frames: 188416. Throughput: 0: 315.8. Samples: 48650. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:53,269][33484] Avg episode reward: [(0, '4.524')] +[2023-06-20 23:19:58,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1156.5). Total num frames: 196608. Throughput: 0: 315.5. Samples: 50540. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:19:58,272][33484] Avg episode reward: [(0, '4.580')] +[2023-06-20 23:20:03,268][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1146.9). Total num frames: 200704. Throughput: 0: 315.7. Samples: 51482. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:03,269][33484] Avg episode reward: [(0, '4.525')] +[2023-06-20 23:20:03,695][33879] Updated weights for policy 0, policy_version 50 (0.0007) +[2023-06-20 23:20:08,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1160.5). Total num frames: 208896. Throughput: 0: 315.3. Samples: 53372. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:08,272][33484] Avg episode reward: [(0, '4.513')] +[2023-06-20 23:20:13,268][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1151.3). Total num frames: 212992. Throughput: 0: 315.2. Samples: 55262. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:13,269][33484] Avg episode reward: [(0, '4.492')] +[2023-06-20 23:20:18,267][33484] Fps is (10 sec: 1229.0, 60 sec: 1297.1, 300 sec: 1164.1). Total num frames: 221184. Throughput: 0: 315.1. Samples: 56206. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:18,269][33484] Avg episode reward: [(0, '4.351')] +[2023-06-20 23:20:23,265][33484] Fps is (10 sec: 1638.9, 60 sec: 1297.1, 300 sec: 1176.3). Total num frames: 229376. Throughput: 0: 315.3. Samples: 58092. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:23,266][33484] Avg episode reward: [(0, '4.348')] +[2023-06-20 23:20:28,267][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1167.4). Total num frames: 233472. Throughput: 0: 315.1. Samples: 59984. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:28,268][33484] Avg episode reward: [(0, '4.215')] +[2023-06-20 23:20:33,269][33484] Fps is (10 sec: 1228.3, 60 sec: 1297.0, 300 sec: 1178.8). Total num frames: 241664. Throughput: 0: 315.5. Samples: 60944. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:33,271][33484] Avg episode reward: [(0, '4.299')] +[2023-06-20 23:20:36,073][33879] Updated weights for policy 0, policy_version 60 (0.0008) +[2023-06-20 23:20:38,268][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1170.3). Total num frames: 245760. Throughput: 0: 314.9. Samples: 62822. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:20:38,269][33484] Avg episode reward: [(0, '4.272')] +[2023-06-20 23:20:43,269][33484] Fps is (10 sec: 1228.9, 60 sec: 1297.0, 300 sec: 1181.2). Total num frames: 253952. Throughput: 0: 315.2. Samples: 64726. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:20:43,270][33484] Avg episode reward: [(0, '4.276')] +[2023-06-20 23:20:48,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1172.9). Total num frames: 258048. Throughput: 0: 315.2. Samples: 65668. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:20:48,270][33484] Avg episode reward: [(0, '4.377')] +[2023-06-20 23:20:53,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1183.3). Total num frames: 266240. Throughput: 0: 315.3. Samples: 67560. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:20:53,270][33484] Avg episode reward: [(0, '4.438')] +[2023-06-20 23:20:58,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1175.4). Total num frames: 270336. Throughput: 0: 315.3. Samples: 69452. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:20:58,271][33484] Avg episode reward: [(0, '4.599')] +[2023-06-20 23:21:03,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1185.2). Total num frames: 278528. Throughput: 0: 315.3. Samples: 70396. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:03,270][33484] Avg episode reward: [(0, '4.480')] +[2023-06-20 23:21:03,273][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000068_278528.pth... +[2023-06-20 23:21:08,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1177.6). Total num frames: 282624. Throughput: 0: 315.4. Samples: 72286. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:08,271][33484] Avg episode reward: [(0, '4.403')] +[2023-06-20 23:21:08,421][33879] Updated weights for policy 0, policy_version 70 (0.0007) +[2023-06-20 23:21:13,266][33484] Fps is (10 sec: 1229.2, 60 sec: 1297.1, 300 sec: 1187.0). Total num frames: 290816. Throughput: 0: 315.7. Samples: 74188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:13,267][33484] Avg episode reward: [(0, '4.389')] +[2023-06-20 23:21:18,268][33484] Fps is (10 sec: 1638.5, 60 sec: 1297.0, 300 sec: 1196.0). Total num frames: 299008. Throughput: 0: 315.3. Samples: 75132. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:18,270][33484] Avg episode reward: [(0, '4.296')] +[2023-06-20 23:21:23,269][33484] Fps is (10 sec: 1228.4, 60 sec: 1228.7, 300 sec: 1188.6). Total num frames: 303104. Throughput: 0: 315.5. Samples: 77020. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:23,270][33484] Avg episode reward: [(0, '4.310')] +[2023-06-20 23:21:28,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1297.0, 300 sec: 1197.3). Total num frames: 311296. Throughput: 0: 315.1. Samples: 78904. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:28,271][33484] Avg episode reward: [(0, '4.228')] +[2023-06-20 23:21:33,266][33484] Fps is (10 sec: 1229.2, 60 sec: 1228.9, 300 sec: 1190.2). Total num frames: 315392. Throughput: 0: 315.4. Samples: 79860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:33,267][33484] Avg episode reward: [(0, '4.228')] +[2023-06-20 23:21:38,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.0, 300 sec: 1198.5). Total num frames: 323584. Throughput: 0: 315.4. Samples: 81752. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:38,271][33484] Avg episode reward: [(0, '4.380')] +[2023-06-20 23:21:40,859][33879] Updated weights for policy 0, policy_version 80 (0.0008) +[2023-06-20 23:21:43,269][33484] Fps is (10 sec: 1228.4, 60 sec: 1228.8, 300 sec: 1191.6). Total num frames: 327680. Throughput: 0: 315.5. Samples: 83650. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:43,270][33484] Avg episode reward: [(0, '4.441')] +[2023-06-20 23:21:48,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1199.5). Total num frames: 335872. Throughput: 0: 315.5. Samples: 84592. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:48,270][33484] Avg episode reward: [(0, '4.444')] +[2023-06-20 23:21:53,269][33484] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1192.9). Total num frames: 339968. Throughput: 0: 315.6. Samples: 86488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:53,270][33484] Avg episode reward: [(0, '4.346')] +[2023-06-20 23:21:58,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1200.5). Total num frames: 348160. Throughput: 0: 315.4. Samples: 88384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:21:58,270][33484] Avg episode reward: [(0, '4.327')] +[2023-06-20 23:22:03,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1194.1). Total num frames: 352256. Throughput: 0: 315.5. Samples: 89332. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:03,272][33484] Avg episode reward: [(0, '4.296')] +[2023-06-20 23:22:08,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1221.9). Total num frames: 360448. Throughput: 0: 315.4. Samples: 91214. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:08,270][33484] Avg episode reward: [(0, '4.320')] +[2023-06-20 23:22:13,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.7, 300 sec: 1235.7). Total num frames: 364544. Throughput: 0: 315.6. Samples: 93108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:13,271][33484] Avg episode reward: [(0, '4.229')] +[2023-06-20 23:22:13,490][33879] Updated weights for policy 0, policy_version 90 (0.0008) +[2023-06-20 23:22:18,267][33484] Fps is (10 sec: 1229.0, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 372736. Throughput: 0: 315.4. Samples: 94054. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:18,269][33484] Avg episode reward: [(0, '4.350')] +[2023-06-20 23:22:23,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 376832. Throughput: 0: 315.1. Samples: 95930. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:23,272][33484] Avg episode reward: [(0, '4.455')] +[2023-06-20 23:22:28,268][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 385024. Throughput: 0: 315.1. Samples: 97830. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:28,269][33484] Avg episode reward: [(0, '4.508')] +[2023-06-20 23:22:33,267][33484] Fps is (10 sec: 1638.7, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 393216. Throughput: 0: 315.3. Samples: 98778. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:22:33,269][33484] Avg episode reward: [(0, '4.591')] +[2023-06-20 23:22:38,268][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 397312. Throughput: 0: 315.2. Samples: 100672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:38,270][33484] Avg episode reward: [(0, '4.663')] +[2023-06-20 23:22:39,427][33878] Saving new best policy, reward=4.663! +[2023-06-20 23:22:43,268][33484] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 405504. Throughput: 0: 315.4. Samples: 102576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:43,270][33484] Avg episode reward: [(0, '4.731')] +[2023-06-20 23:22:43,272][33878] Saving new best policy, reward=4.731! +[2023-06-20 23:22:45,801][33879] Updated weights for policy 0, policy_version 100 (0.0009) +[2023-06-20 23:22:48,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 409600. Throughput: 0: 315.3. Samples: 103520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:48,271][33484] Avg episode reward: [(0, '4.652')] +[2023-06-20 23:22:53,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 417792. Throughput: 0: 315.7. Samples: 105420. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:53,271][33484] Avg episode reward: [(0, '4.531')] +[2023-06-20 23:22:58,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 421888. Throughput: 0: 315.7. Samples: 107316. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:22:58,271][33484] Avg episode reward: [(0, '4.567')] +[2023-06-20 23:23:03,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 430080. Throughput: 0: 315.6. Samples: 108258. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:03,270][33484] Avg episode reward: [(0, '4.580')] +[2023-06-20 23:23:03,273][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000105_430080.pth... +[2023-06-20 23:23:03,348][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000031_126976.pth +[2023-06-20 23:23:08,269][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 434176. Throughput: 0: 316.1. Samples: 110154. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:08,270][33484] Avg episode reward: [(0, '4.538')] +[2023-06-20 23:23:13,266][33484] Fps is (10 sec: 1229.1, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 442368. Throughput: 0: 315.9. Samples: 112044. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:13,268][33484] Avg episode reward: [(0, '4.424')] +[2023-06-20 23:23:18,248][33879] Updated weights for policy 0, policy_version 110 (0.0010) +[2023-06-20 23:23:18,269][33484] Fps is (10 sec: 1638.3, 60 sec: 1297.0, 300 sec: 1277.4). Total num frames: 450560. Throughput: 0: 315.8. Samples: 112990. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:18,270][33484] Avg episode reward: [(0, '4.473')] +[2023-06-20 23:23:23,266][33484] Fps is (10 sec: 1228.8, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 454656. Throughput: 0: 315.9. Samples: 114888. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:23,267][33484] Avg episode reward: [(0, '4.485')] +[2023-06-20 23:23:28,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1277.4). Total num frames: 462848. Throughput: 0: 315.8. Samples: 116786. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:28,271][33484] Avg episode reward: [(0, '4.600')] +[2023-06-20 23:23:33,267][33484] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 466944. Throughput: 0: 315.8. Samples: 117732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:33,268][33484] Avg episode reward: [(0, '4.509')] +[2023-06-20 23:23:38,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1297.1, 300 sec: 1263.5). Total num frames: 475136. Throughput: 0: 315.7. Samples: 119628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:38,271][33484] Avg episode reward: [(0, '4.518')] +[2023-06-20 23:23:43,268][33484] Fps is (10 sec: 1228.6, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 479232. Throughput: 0: 315.5. Samples: 121512. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:43,270][33484] Avg episode reward: [(0, '4.554')] +[2023-06-20 23:23:48,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1297.0, 300 sec: 1263.5). Total num frames: 487424. Throughput: 0: 314.4. Samples: 122406. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:48,274][33484] Avg episode reward: [(0, '4.537')] +[2023-06-20 23:23:51,399][33879] Updated weights for policy 0, policy_version 120 (0.0008) +[2023-06-20 23:23:53,271][33484] Fps is (10 sec: 1228.5, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 491520. Throughput: 0: 309.6. Samples: 124088. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:53,273][33484] Avg episode reward: [(0, '4.572')] +[2023-06-20 23:23:58,272][33484] Fps is (10 sec: 819.2, 60 sec: 1228.7, 300 sec: 1249.6). Total num frames: 495616. Throughput: 0: 304.7. Samples: 125758. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:23:58,277][33484] Avg episode reward: [(0, '4.463')] +[2023-06-20 23:24:03,267][33484] Fps is (10 sec: 1229.2, 60 sec: 1228.8, 300 sec: 1263.5). Total num frames: 503808. Throughput: 0: 302.4. Samples: 126596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:03,268][33484] Avg episode reward: [(0, '4.453')] +[2023-06-20 23:24:08,270][33484] Fps is (10 sec: 1229.1, 60 sec: 1228.8, 300 sec: 1249.6). Total num frames: 507904. Throughput: 0: 297.6. Samples: 128282. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:08,274][33484] Avg episode reward: [(0, '4.564')] +[2023-06-20 23:24:13,268][33484] Fps is (10 sec: 819.2, 60 sec: 1160.5, 300 sec: 1249.6). Total num frames: 512000. Throughput: 0: 292.6. Samples: 129954. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:24:13,269][33484] Avg episode reward: [(0, '4.550')] +[2023-06-20 23:24:18,272][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1249.6). Total num frames: 520192. Throughput: 0: 290.4. Samples: 130802. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:18,273][33484] Avg episode reward: [(0, '4.604')] +[2023-06-20 23:24:23,273][33484] Fps is (10 sec: 1228.2, 60 sec: 1160.4, 300 sec: 1235.7). Total num frames: 524288. Throughput: 0: 285.7. Samples: 132486. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:24:23,274][33484] Avg episode reward: [(0, '4.614')] +[2023-06-20 23:24:28,001][33879] Updated weights for policy 0, policy_version 130 (0.0008) +[2023-06-20 23:24:28,270][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1249.6). Total num frames: 532480. Throughput: 0: 281.1. Samples: 134160. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:28,271][33484] Avg episode reward: [(0, '4.603')] +[2023-06-20 23:24:33,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.4, 300 sec: 1235.7). Total num frames: 536576. Throughput: 0: 280.0. Samples: 135008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:33,274][33484] Avg episode reward: [(0, '4.556')] +[2023-06-20 23:24:38,273][33484] Fps is (10 sec: 819.0, 60 sec: 1092.2, 300 sec: 1235.7). Total num frames: 540672. Throughput: 0: 279.7. Samples: 136676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:38,275][33484] Avg episode reward: [(0, '4.688')] +[2023-06-20 23:24:43,270][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1235.7). Total num frames: 548864. Throughput: 0: 279.8. Samples: 138350. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:43,272][33484] Avg episode reward: [(0, '4.570')] +[2023-06-20 23:24:48,270][33484] Fps is (10 sec: 1229.1, 60 sec: 1092.3, 300 sec: 1235.7). Total num frames: 552960. Throughput: 0: 279.8. Samples: 139188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:48,272][33484] Avg episode reward: [(0, '4.573')] +[2023-06-20 23:24:53,272][33484] Fps is (10 sec: 819.0, 60 sec: 1092.2, 300 sec: 1221.8). Total num frames: 557056. Throughput: 0: 279.7. Samples: 140868. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:53,274][33484] Avg episode reward: [(0, '4.513')] +[2023-06-20 23:24:58,273][33484] Fps is (10 sec: 1228.5, 60 sec: 1160.5, 300 sec: 1235.7). Total num frames: 565248. Throughput: 0: 279.7. Samples: 142542. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:24:58,274][33484] Avg episode reward: [(0, '4.528')] +[2023-06-20 23:25:03,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.2, 300 sec: 1221.8). Total num frames: 569344. Throughput: 0: 279.7. Samples: 143390. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:03,272][33484] Avg episode reward: [(0, '4.528')] +[2023-06-20 23:25:04,692][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000140_573440.pth... +[2023-06-20 23:25:04,702][33879] Updated weights for policy 0, policy_version 140 (0.0008) +[2023-06-20 23:25:04,755][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000068_278528.pth +[2023-06-20 23:25:08,272][33484] Fps is (10 sec: 819.3, 60 sec: 1092.2, 300 sec: 1221.8). Total num frames: 573440. Throughput: 0: 279.9. Samples: 145080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:08,273][33484] Avg episode reward: [(0, '4.583')] +[2023-06-20 23:25:13,274][33484] Fps is (10 sec: 1228.5, 60 sec: 1160.4, 300 sec: 1221.8). Total num frames: 581632. Throughput: 0: 279.8. Samples: 146754. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:13,275][33484] Avg episode reward: [(0, '4.574')] +[2023-06-20 23:25:18,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1207.9). Total num frames: 585728. Throughput: 0: 279.8. Samples: 147598. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:18,271][33484] Avg episode reward: [(0, '4.532')] +[2023-06-20 23:25:23,271][33484] Fps is (10 sec: 1229.2, 60 sec: 1160.6, 300 sec: 1221.8). Total num frames: 593920. Throughput: 0: 280.2. Samples: 149284. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:23,273][33484] Avg episode reward: [(0, '4.496')] +[2023-06-20 23:25:28,274][33484] Fps is (10 sec: 1228.4, 60 sec: 1092.2, 300 sec: 1208.0). Total num frames: 598016. Throughput: 0: 280.3. Samples: 150964. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:28,275][33484] Avg episode reward: [(0, '4.486')] +[2023-06-20 23:25:33,273][33484] Fps is (10 sec: 819.0, 60 sec: 1092.3, 300 sec: 1208.0). Total num frames: 602112. Throughput: 0: 280.2. Samples: 151796. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:33,277][33484] Avg episode reward: [(0, '4.430')] +[2023-06-20 23:25:38,271][33484] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1208.0). Total num frames: 610304. Throughput: 0: 280.2. Samples: 153476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:38,272][33484] Avg episode reward: [(0, '4.415')] +[2023-06-20 23:25:41,285][33879] Updated weights for policy 0, policy_version 150 (0.0009) +[2023-06-20 23:25:43,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1208.0). Total num frames: 614400. Throughput: 0: 280.3. Samples: 155154. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:43,273][33484] Avg episode reward: [(0, '4.470')] +[2023-06-20 23:25:48,266][33484] Fps is (10 sec: 819.6, 60 sec: 1092.3, 300 sec: 1194.1). Total num frames: 618496. Throughput: 0: 280.2. Samples: 155996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:48,270][33484] Avg episode reward: [(0, '4.459')] +[2023-06-20 23:25:53,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1208.0). Total num frames: 626688. Throughput: 0: 280.0. Samples: 157678. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:53,273][33484] Avg episode reward: [(0, '4.481')] +[2023-06-20 23:25:58,272][33484] Fps is (10 sec: 1228.1, 60 sec: 1092.3, 300 sec: 1194.1). Total num frames: 630784. Throughput: 0: 280.0. Samples: 159352. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:25:58,274][33484] Avg episode reward: [(0, '4.504')] +[2023-06-20 23:26:03,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1208.0). Total num frames: 638976. Throughput: 0: 280.0. Samples: 160198. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:03,275][33484] Avg episode reward: [(0, '4.549')] +[2023-06-20 23:26:08,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1194.1). Total num frames: 643072. Throughput: 0: 279.8. Samples: 161876. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:08,273][33484] Avg episode reward: [(0, '4.681')] +[2023-06-20 23:26:13,269][33484] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1180.2). Total num frames: 647168. Throughput: 0: 279.9. Samples: 163558. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:13,272][33484] Avg episode reward: [(0, '4.631')] +[2023-06-20 23:26:17,979][33879] Updated weights for policy 0, policy_version 160 (0.0009) +[2023-06-20 23:26:18,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1194.1). Total num frames: 655360. Throughput: 0: 280.1. Samples: 164400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:18,273][33484] Avg episode reward: [(0, '4.616')] +[2023-06-20 23:26:23,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1092.2, 300 sec: 1180.2). Total num frames: 659456. Throughput: 0: 280.2. Samples: 166084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:23,273][33484] Avg episode reward: [(0, '4.504')] +[2023-06-20 23:26:28,270][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1180.2). Total num frames: 663552. Throughput: 0: 280.5. Samples: 167774. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:28,273][33484] Avg episode reward: [(0, '4.496')] +[2023-06-20 23:26:33,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1180.2). Total num frames: 671744. Throughput: 0: 280.3. Samples: 168612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:33,272][33484] Avg episode reward: [(0, '4.587')] +[2023-06-20 23:26:38,272][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.3, 300 sec: 1180.2). Total num frames: 675840. Throughput: 0: 280.4. Samples: 170294. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:38,274][33484] Avg episode reward: [(0, '4.564')] +[2023-06-20 23:26:43,272][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1166.3). Total num frames: 679936. Throughput: 0: 280.5. Samples: 171974. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:43,273][33484] Avg episode reward: [(0, '4.528')] +[2023-06-20 23:26:48,266][33484] Fps is (10 sec: 1229.5, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 688128. Throughput: 0: 280.4. Samples: 172814. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:48,267][33484] Avg episode reward: [(0, '4.590')] +[2023-06-20 23:26:53,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1166.3). Total num frames: 692224. Throughput: 0: 280.6. Samples: 174502. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:53,276][33484] Avg episode reward: [(0, '4.544')] +[2023-06-20 23:26:54,342][33879] Updated weights for policy 0, policy_version 170 (0.0011) +[2023-06-20 23:26:58,271][33484] Fps is (10 sec: 1228.2, 60 sec: 1160.5, 300 sec: 1180.2). Total num frames: 700416. Throughput: 0: 280.4. Samples: 176176. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:26:58,272][33484] Avg episode reward: [(0, '4.609')] +[2023-06-20 23:27:03,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1166.3). Total num frames: 704512. Throughput: 0: 280.4. Samples: 177016. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:03,273][33484] Avg episode reward: [(0, '4.640')] +[2023-06-20 23:27:03,276][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000172_704512.pth... +[2023-06-20 23:27:03,362][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000105_430080.pth +[2023-06-20 23:27:08,272][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1166.3). Total num frames: 708608. Throughput: 0: 280.4. Samples: 178704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:08,274][33484] Avg episode reward: [(0, '4.530')] +[2023-06-20 23:27:13,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1166.3). Total num frames: 716800. Throughput: 0: 280.2. Samples: 180384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:13,273][33484] Avg episode reward: [(0, '4.574')] +[2023-06-20 23:27:18,274][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.2, 300 sec: 1166.3). Total num frames: 720896. Throughput: 0: 280.2. Samples: 181220. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:18,279][33484] Avg episode reward: [(0, '4.525')] +[2023-06-20 23:27:23,271][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1152.4). Total num frames: 724992. Throughput: 0: 280.4. Samples: 182910. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:23,274][33484] Avg episode reward: [(0, '4.490')] +[2023-06-20 23:27:28,272][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1152.4). Total num frames: 733184. Throughput: 0: 280.3. Samples: 184588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:28,272][33484] Avg episode reward: [(0, '4.516')] +[2023-06-20 23:27:30,871][33879] Updated weights for policy 0, policy_version 180 (0.0009) +[2023-06-20 23:27:33,264][33484] Fps is (10 sec: 1229.7, 60 sec: 1092.4, 300 sec: 1152.5). Total num frames: 737280. Throughput: 0: 280.2. Samples: 185424. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:33,265][33484] Avg episode reward: [(0, '4.461')] +[2023-06-20 23:27:38,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1152.4). Total num frames: 745472. Throughput: 0: 280.0. Samples: 187100. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:38,275][33484] Avg episode reward: [(0, '4.452')] +[2023-06-20 23:27:43,270][33484] Fps is (10 sec: 1228.1, 60 sec: 1160.6, 300 sec: 1152.4). Total num frames: 749568. Throughput: 0: 279.7. Samples: 188764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:43,272][33484] Avg episode reward: [(0, '4.611')] +[2023-06-20 23:27:48,271][33484] Fps is (10 sec: 819.4, 60 sec: 1092.2, 300 sec: 1138.5). Total num frames: 753664. Throughput: 0: 279.9. Samples: 189610. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:48,274][33484] Avg episode reward: [(0, '4.650')] +[2023-06-20 23:27:53,272][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.6, 300 sec: 1152.4). Total num frames: 761856. Throughput: 0: 279.7. Samples: 191292. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:53,274][33484] Avg episode reward: [(0, '4.705')] +[2023-06-20 23:27:58,268][33484] Fps is (10 sec: 1229.1, 60 sec: 1092.3, 300 sec: 1138.6). Total num frames: 765952. Throughput: 0: 280.0. Samples: 192984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:27:58,270][33484] Avg episode reward: [(0, '4.754')] +[2023-06-20 23:28:00,090][33878] Saving new best policy, reward=4.754! +[2023-06-20 23:28:03,268][33484] Fps is (10 sec: 819.5, 60 sec: 1092.3, 300 sec: 1138.6). Total num frames: 770048. Throughput: 0: 279.9. Samples: 193814. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:03,269][33484] Avg episode reward: [(0, '4.821')] +[2023-06-20 23:28:03,763][33878] Saving new best policy, reward=4.821! +[2023-06-20 23:28:07,397][33879] Updated weights for policy 0, policy_version 190 (0.0011) +[2023-06-20 23:28:08,270][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.6, 300 sec: 1138.5). Total num frames: 778240. Throughput: 0: 279.6. Samples: 195490. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:08,271][33484] Avg episode reward: [(0, '4.841')] +[2023-06-20 23:28:08,272][33878] Saving new best policy, reward=4.841! +[2023-06-20 23:28:13,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 782336. Throughput: 0: 279.6. Samples: 197170. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:13,270][33484] Avg episode reward: [(0, '4.813')] +[2023-06-20 23:28:18,272][33484] Fps is (10 sec: 819.0, 60 sec: 1092.3, 300 sec: 1124.6). Total num frames: 786432. Throughput: 0: 279.8. Samples: 198016. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:18,275][33484] Avg episode reward: [(0, '4.881')] +[2023-06-20 23:28:18,395][33878] Saving new best policy, reward=4.881! +[2023-06-20 23:28:23,273][33484] Fps is (10 sec: 1228.4, 60 sec: 1160.5, 300 sec: 1124.6). Total num frames: 794624. Throughput: 0: 279.9. Samples: 199696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:23,274][33484] Avg episode reward: [(0, '4.895')] +[2023-06-20 23:28:23,277][33878] Saving new best policy, reward=4.895! +[2023-06-20 23:28:28,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 798720. Throughput: 0: 280.1. Samples: 201368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:28,274][33484] Avg episode reward: [(0, '4.836')] +[2023-06-20 23:28:33,271][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.4, 300 sec: 1124.7). Total num frames: 806912. Throughput: 0: 279.7. Samples: 202196. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:33,273][33484] Avg episode reward: [(0, '4.918')] +[2023-06-20 23:28:33,275][33878] Saving new best policy, reward=4.918! +[2023-06-20 23:28:38,270][33484] Fps is (10 sec: 1229.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 811008. Throughput: 0: 279.7. Samples: 203880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:38,272][33484] Avg episode reward: [(0, '4.904')] +[2023-06-20 23:28:43,274][33484] Fps is (10 sec: 819.0, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 815104. Throughput: 0: 279.6. Samples: 205568. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:43,275][33484] Avg episode reward: [(0, '5.109')] +[2023-06-20 23:28:44,179][33878] Saving new best policy, reward=5.109! +[2023-06-20 23:28:44,184][33879] Updated weights for policy 0, policy_version 200 (0.0008) +[2023-06-20 23:28:48,273][33484] Fps is (10 sec: 1228.4, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 823296. Throughput: 0: 280.0. Samples: 206416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:48,276][33484] Avg episode reward: [(0, '5.079')] +[2023-06-20 23:28:53,271][33484] Fps is (10 sec: 1229.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 827392. Throughput: 0: 280.0. Samples: 208092. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:53,272][33484] Avg episode reward: [(0, '5.114')] +[2023-06-20 23:28:55,238][33878] Saving new best policy, reward=5.114! +[2023-06-20 23:28:58,272][33484] Fps is (10 sec: 819.4, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 831488. Throughput: 0: 280.1. Samples: 209774. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:28:58,275][33484] Avg episode reward: [(0, '5.105')] +[2023-06-20 23:29:03,271][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 839680. Throughput: 0: 280.0. Samples: 210616. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:03,274][33484] Avg episode reward: [(0, '4.977')] +[2023-06-20 23:29:03,276][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000205_839680.pth... +[2023-06-20 23:29:03,358][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000140_573440.pth +[2023-06-20 23:29:08,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 843776. Throughput: 0: 280.1. Samples: 212300. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:08,274][33484] Avg episode reward: [(0, '4.964')] +[2023-06-20 23:29:13,271][33484] Fps is (10 sec: 819.2, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 847872. Throughput: 0: 280.5. Samples: 213990. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:13,273][33484] Avg episode reward: [(0, '4.997')] +[2023-06-20 23:29:18,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 856064. Throughput: 0: 280.9. Samples: 214838. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:18,274][33484] Avg episode reward: [(0, '5.142')] +[2023-06-20 23:29:18,275][33878] Saving new best policy, reward=5.142! +[2023-06-20 23:29:20,647][33879] Updated weights for policy 0, policy_version 210 (0.0009) +[2023-06-20 23:29:23,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 860160. Throughput: 0: 280.9. Samples: 216520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:23,273][33484] Avg episode reward: [(0, '4.928')] +[2023-06-20 23:29:28,271][33484] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 868352. Throughput: 0: 280.7. Samples: 218198. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:28,273][33484] Avg episode reward: [(0, '4.980')] +[2023-06-20 23:29:33,268][33484] Fps is (10 sec: 1229.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 872448. Throughput: 0: 280.6. Samples: 219042. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:33,268][33484] Avg episode reward: [(0, '4.984')] +[2023-06-20 23:29:38,270][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 876544. Throughput: 0: 280.8. Samples: 220730. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:38,273][33484] Avg episode reward: [(0, '5.105')] +[2023-06-20 23:29:43,267][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 884736. Throughput: 0: 280.8. Samples: 222408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:43,268][33484] Avg episode reward: [(0, '5.089')] +[2023-06-20 23:29:48,269][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 888832. Throughput: 0: 280.6. Samples: 223242. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:48,270][33484] Avg episode reward: [(0, '4.983')] +[2023-06-20 23:29:53,272][33484] Fps is (10 sec: 818.8, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 892928. Throughput: 0: 280.6. Samples: 224926. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:53,274][33484] Avg episode reward: [(0, '4.934')] +[2023-06-20 23:29:57,217][33879] Updated weights for policy 0, policy_version 220 (0.0010) +[2023-06-20 23:29:58,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 901120. Throughput: 0: 280.3. Samples: 226604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:29:58,272][33484] Avg episode reward: [(0, '4.804')] +[2023-06-20 23:30:03,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 905216. Throughput: 0: 280.0. Samples: 227436. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:03,277][33484] Avg episode reward: [(0, '4.820')] +[2023-06-20 23:30:08,270][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 913408. Throughput: 0: 280.1. Samples: 229124. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:08,271][33484] Avg episode reward: [(0, '4.781')] +[2023-06-20 23:30:13,270][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 917504. Throughput: 0: 280.2. Samples: 230806. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:13,272][33484] Avg episode reward: [(0, '4.872')] +[2023-06-20 23:30:18,272][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 921600. Throughput: 0: 279.9. Samples: 231638. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:18,274][33484] Avg episode reward: [(0, '4.931')] +[2023-06-20 23:30:23,264][33484] Fps is (10 sec: 1229.6, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 929792. Throughput: 0: 280.1. Samples: 233334. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:23,267][33484] Avg episode reward: [(0, '4.913')] +[2023-06-20 23:30:28,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 933888. Throughput: 0: 280.0. Samples: 235008. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:30:28,274][33484] Avg episode reward: [(0, '4.873')] +[2023-06-20 23:30:33,273][33484] Fps is (10 sec: 818.5, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 937984. Throughput: 0: 280.1. Samples: 235848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:33,276][33484] Avg episode reward: [(0, '4.935')] +[2023-06-20 23:30:33,790][33879] Updated weights for policy 0, policy_version 230 (0.0008) +[2023-06-20 23:30:38,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 946176. Throughput: 0: 280.2. Samples: 237534. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:30:38,274][33484] Avg episode reward: [(0, '4.994')] +[2023-06-20 23:30:43,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 950272. Throughput: 0: 280.1. Samples: 239210. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:43,274][33484] Avg episode reward: [(0, '5.068')] +[2023-06-20 23:30:48,272][33484] Fps is (10 sec: 819.3, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 954368. Throughput: 0: 280.5. Samples: 240058. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:30:48,274][33484] Avg episode reward: [(0, '5.042')] +[2023-06-20 23:30:53,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 962560. Throughput: 0: 280.3. Samples: 241738. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:30:53,275][33484] Avg episode reward: [(0, '4.960')] +[2023-06-20 23:30:58,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 966656. Throughput: 0: 280.3. Samples: 243418. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:30:58,273][33484] Avg episode reward: [(0, '4.969')] +[2023-06-20 23:31:03,271][33484] Fps is (10 sec: 1229.1, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 974848. Throughput: 0: 280.6. Samples: 244266. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:03,273][33484] Avg episode reward: [(0, '5.064')] +[2023-06-20 23:31:03,276][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000238_974848.pth... +[2023-06-20 23:31:03,350][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000172_704512.pth +[2023-06-20 23:31:08,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 978944. Throughput: 0: 280.3. Samples: 245948. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-06-20 23:31:08,273][33484] Avg episode reward: [(0, '5.141')] +[2023-06-20 23:31:10,243][33879] Updated weights for policy 0, policy_version 240 (0.0010) +[2023-06-20 23:31:13,272][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 983040. Throughput: 0: 280.4. Samples: 247628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:13,274][33484] Avg episode reward: [(0, '5.015')] +[2023-06-20 23:31:18,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 991232. Throughput: 0: 280.5. Samples: 248470. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:18,273][33484] Avg episode reward: [(0, '4.962')] +[2023-06-20 23:31:23,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 995328. Throughput: 0: 280.5. Samples: 250154. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:23,273][33484] Avg episode reward: [(0, '4.883')] +[2023-06-20 23:31:28,272][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 999424. Throughput: 0: 280.7. Samples: 251842. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:28,274][33484] Avg episode reward: [(0, '5.030')] +[2023-06-20 23:31:33,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1007616. Throughput: 0: 280.4. Samples: 252676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:33,271][33484] Avg episode reward: [(0, '5.073')] +[2023-06-20 23:31:38,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1011712. Throughput: 0: 280.6. Samples: 254366. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:38,274][33484] Avg episode reward: [(0, '5.085')] +[2023-06-20 23:31:43,270][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1124.6). Total num frames: 1019904. Throughput: 0: 280.4. Samples: 256034. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:43,272][33484] Avg episode reward: [(0, '5.163')] +[2023-06-20 23:31:43,274][33878] Saving new best policy, reward=5.163! +[2023-06-20 23:31:46,702][33879] Updated weights for policy 0, policy_version 250 (0.0009) +[2023-06-20 23:31:48,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1024000. Throughput: 0: 280.3. Samples: 256880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:48,274][33484] Avg episode reward: [(0, '5.079')] +[2023-06-20 23:31:53,272][33484] Fps is (10 sec: 819.0, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1028096. Throughput: 0: 280.2. Samples: 258556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:53,274][33484] Avg episode reward: [(0, '5.111')] +[2023-06-20 23:31:58,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1036288. Throughput: 0: 280.5. Samples: 260250. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:31:58,274][33484] Avg episode reward: [(0, '5.255')] +[2023-06-20 23:31:58,275][33878] Saving new best policy, reward=5.255! +[2023-06-20 23:32:03,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1040384. Throughput: 0: 280.4. Samples: 261088. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:03,274][33484] Avg episode reward: [(0, '5.293')] +[2023-06-20 23:32:05,202][33878] Saving new best policy, reward=5.293! +[2023-06-20 23:32:08,271][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1044480. Throughput: 0: 280.2. Samples: 262762. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:08,272][33484] Avg episode reward: [(0, '5.113')] +[2023-06-20 23:32:13,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1052672. Throughput: 0: 279.9. Samples: 264436. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:13,274][33484] Avg episode reward: [(0, '4.982')] +[2023-06-20 23:32:18,273][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1056768. Throughput: 0: 280.1. Samples: 265282. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:18,274][33484] Avg episode reward: [(0, '4.984')] +[2023-06-20 23:32:23,264][33879] Updated weights for policy 0, policy_version 260 (0.0010) +[2023-06-20 23:32:23,264][33484] Fps is (10 sec: 1229.7, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 1064960. Throughput: 0: 279.9. Samples: 266960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:23,267][33484] Avg episode reward: [(0, '4.921')] +[2023-06-20 23:32:28,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.6). Total num frames: 1069056. Throughput: 0: 280.3. Samples: 268648. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:28,272][33484] Avg episode reward: [(0, '5.072')] +[2023-06-20 23:32:33,272][33484] Fps is (10 sec: 818.6, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1073152. Throughput: 0: 280.1. Samples: 269486. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:33,274][33484] Avg episode reward: [(0, '5.101')] +[2023-06-20 23:32:38,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1081344. Throughput: 0: 280.2. Samples: 271166. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:38,273][33484] Avg episode reward: [(0, '5.126')] +[2023-06-20 23:32:43,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1085440. Throughput: 0: 279.8. Samples: 272840. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:43,275][33484] Avg episode reward: [(0, '5.115')] +[2023-06-20 23:32:48,274][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1089536. Throughput: 0: 279.8. Samples: 273680. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:48,277][33484] Avg episode reward: [(0, '5.222')] +[2023-06-20 23:32:53,265][33484] Fps is (10 sec: 1229.7, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 1097728. Throughput: 0: 280.2. Samples: 275368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:53,267][33484] Avg episode reward: [(0, '5.425')] +[2023-06-20 23:32:53,269][33878] Saving new best policy, reward=5.425! +[2023-06-20 23:32:58,272][33484] Fps is (10 sec: 1229.0, 60 sec: 1092.3, 300 sec: 1124.6). Total num frames: 1101824. Throughput: 0: 280.4. Samples: 277054. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:32:58,273][33484] Avg episode reward: [(0, '5.421')] +[2023-06-20 23:32:59,923][33879] Updated weights for policy 0, policy_version 270 (0.0011) +[2023-06-20 23:33:03,272][33484] Fps is (10 sec: 818.7, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1105920. Throughput: 0: 280.2. Samples: 277892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:03,275][33484] Avg episode reward: [(0, '5.536')] +[2023-06-20 23:33:03,524][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000271_1110016.pth... +[2023-06-20 23:33:03,589][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000205_839680.pth +[2023-06-20 23:33:03,598][33878] Saving new best policy, reward=5.536! +[2023-06-20 23:33:08,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1114112. Throughput: 0: 280.0. Samples: 279562. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:08,274][33484] Avg episode reward: [(0, '5.408')] +[2023-06-20 23:33:13,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1118208. Throughput: 0: 279.7. Samples: 281236. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:13,273][33484] Avg episode reward: [(0, '5.367')] +[2023-06-20 23:33:18,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1126400. Throughput: 0: 280.0. Samples: 282088. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:18,275][33484] Avg episode reward: [(0, '5.392')] +[2023-06-20 23:33:23,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1130496. Throughput: 0: 280.1. Samples: 283772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:23,272][33484] Avg episode reward: [(0, '5.306')] +[2023-06-20 23:33:28,267][33484] Fps is (10 sec: 819.7, 60 sec: 1092.4, 300 sec: 1110.8). Total num frames: 1134592. Throughput: 0: 280.3. Samples: 285450. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:28,271][33484] Avg episode reward: [(0, '5.201')] +[2023-06-20 23:33:33,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1142784. Throughput: 0: 280.4. Samples: 286296. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:33,274][33484] Avg episode reward: [(0, '5.220')] +[2023-06-20 23:33:36,266][33879] Updated weights for policy 0, policy_version 280 (0.0012) +[2023-06-20 23:33:38,270][33484] Fps is (10 sec: 1228.4, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1146880. Throughput: 0: 279.9. Samples: 287966. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:38,271][33484] Avg episode reward: [(0, '5.245')] +[2023-06-20 23:33:43,273][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1150976. Throughput: 0: 279.7. Samples: 289642. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:43,274][33484] Avg episode reward: [(0, '5.216')] +[2023-06-20 23:33:48,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1159168. Throughput: 0: 279.8. Samples: 290482. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:48,274][33484] Avg episode reward: [(0, '5.225')] +[2023-06-20 23:33:53,273][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1163264. Throughput: 0: 280.1. Samples: 292166. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:53,276][33484] Avg episode reward: [(0, '5.280')] +[2023-06-20 23:33:58,265][33484] Fps is (10 sec: 1229.7, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 1171456. Throughput: 0: 280.1. Samples: 293840. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:33:58,267][33484] Avg episode reward: [(0, '5.372')] +[2023-06-20 23:34:03,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1175552. Throughput: 0: 279.7. Samples: 294672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:03,272][33484] Avg episode reward: [(0, '5.440')] +[2023-06-20 23:34:08,272][33484] Fps is (10 sec: 818.6, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1179648. Throughput: 0: 279.8. Samples: 296364. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:08,274][33484] Avg episode reward: [(0, '5.553')] +[2023-06-20 23:34:09,205][33878] Saving new best policy, reward=5.553! +[2023-06-20 23:34:12,837][33879] Updated weights for policy 0, policy_version 290 (0.0010) +[2023-06-20 23:34:13,267][33484] Fps is (10 sec: 1229.3, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1187840. Throughput: 0: 279.8. Samples: 298040. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:13,269][33484] Avg episode reward: [(0, '5.672')] +[2023-06-20 23:34:13,272][33878] Saving new best policy, reward=5.672! +[2023-06-20 23:34:18,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1191936. Throughput: 0: 279.6. Samples: 298880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:18,274][33484] Avg episode reward: [(0, '5.712')] +[2023-06-20 23:34:20,196][33878] Saving new best policy, reward=5.712! +[2023-06-20 23:34:23,271][33484] Fps is (10 sec: 818.9, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1196032. Throughput: 0: 280.0. Samples: 300566. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:23,273][33484] Avg episode reward: [(0, '5.767')] +[2023-06-20 23:34:23,769][33878] Saving new best policy, reward=5.767! +[2023-06-20 23:34:28,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.4, 300 sec: 1124.6). Total num frames: 1204224. Throughput: 0: 280.3. Samples: 302254. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:28,274][33484] Avg episode reward: [(0, '5.870')] +[2023-06-20 23:34:28,275][33878] Saving new best policy, reward=5.870! +[2023-06-20 23:34:33,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1208320. Throughput: 0: 280.5. Samples: 303104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:33,273][33484] Avg episode reward: [(0, '5.877')] +[2023-06-20 23:34:34,685][33878] Saving new best policy, reward=5.877! +[2023-06-20 23:34:38,271][33484] Fps is (10 sec: 819.3, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1212416. Throughput: 0: 280.5. Samples: 304788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:38,272][33484] Avg episode reward: [(0, '5.916')] +[2023-06-20 23:34:38,341][33878] Saving new best policy, reward=5.916! +[2023-06-20 23:34:43,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1220608. Throughput: 0: 280.4. Samples: 306462. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:43,274][33484] Avg episode reward: [(0, '5.999')] +[2023-06-20 23:34:43,276][33878] Saving new best policy, reward=5.999! +[2023-06-20 23:34:48,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1224704. Throughput: 0: 280.7. Samples: 307304. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:34:48,274][33484] Avg episode reward: [(0, '5.743')] +[2023-06-20 23:34:49,332][33879] Updated weights for policy 0, policy_version 300 (0.0015) +[2023-06-20 23:34:53,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1232896. Throughput: 0: 280.4. Samples: 308984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:53,274][33484] Avg episode reward: [(0, '5.787')] +[2023-06-20 23:34:58,271][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1236992. Throughput: 0: 280.6. Samples: 310670. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:34:58,273][33484] Avg episode reward: [(0, '5.759')] +[2023-06-20 23:35:03,272][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1241088. Throughput: 0: 280.4. Samples: 311500. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:35:03,275][33484] Avg episode reward: [(0, '5.611')] +[2023-06-20 23:35:03,916][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000304_1245184.pth... +[2023-06-20 23:35:03,981][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000238_974848.pth +[2023-06-20 23:35:08,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1249280. Throughput: 0: 280.4. Samples: 313184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:35:08,273][33484] Avg episode reward: [(0, '5.604')] +[2023-06-20 23:35:13,268][33484] Fps is (10 sec: 1229.3, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1253376. Throughput: 0: 280.2. Samples: 314862. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:35:13,270][33484] Avg episode reward: [(0, '5.426')] +[2023-06-20 23:35:18,271][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1257472. Throughput: 0: 280.1. Samples: 315710. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:18,273][33484] Avg episode reward: [(0, '5.566')] +[2023-06-20 23:35:23,273][33484] Fps is (10 sec: 1228.3, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1265664. Throughput: 0: 280.1. Samples: 317392. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:23,274][33484] Avg episode reward: [(0, '5.583')] +[2023-06-20 23:35:25,768][33879] Updated weights for policy 0, policy_version 310 (0.0010) +[2023-06-20 23:35:28,273][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1269760. Throughput: 0: 280.3. Samples: 319074. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:28,274][33484] Avg episode reward: [(0, '5.808')] +[2023-06-20 23:35:33,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1277952. Throughput: 0: 280.3. Samples: 319916. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:33,274][33484] Avg episode reward: [(0, '5.861')] +[2023-06-20 23:35:38,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1282048. Throughput: 0: 280.5. Samples: 321606. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:38,273][33484] Avg episode reward: [(0, '5.862')] +[2023-06-20 23:35:43,264][33484] Fps is (10 sec: 819.8, 60 sec: 1092.4, 300 sec: 1124.7). Total num frames: 1286144. Throughput: 0: 280.3. Samples: 323282. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:43,268][33484] Avg episode reward: [(0, '6.192')] +[2023-06-20 23:35:44,071][33878] Saving new best policy, reward=6.192! +[2023-06-20 23:35:48,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1294336. Throughput: 0: 280.6. Samples: 324126. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:48,274][33484] Avg episode reward: [(0, '6.267')] +[2023-06-20 23:35:48,277][33878] Saving new best policy, reward=6.267! +[2023-06-20 23:35:53,270][33484] Fps is (10 sec: 1228.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1298432. Throughput: 0: 280.5. Samples: 325808. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:53,271][33484] Avg episode reward: [(0, '6.544')] +[2023-06-20 23:35:55,042][33878] Saving new best policy, reward=6.544! +[2023-06-20 23:35:58,271][33484] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1302528. Throughput: 0: 280.5. Samples: 327486. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:35:58,272][33484] Avg episode reward: [(0, '6.672')] +[2023-06-20 23:35:58,691][33878] Saving new best policy, reward=6.672! +[2023-06-20 23:36:02,268][33879] Updated weights for policy 0, policy_version 320 (0.0009) +[2023-06-20 23:36:03,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1310720. Throughput: 0: 280.3. Samples: 328322. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:03,273][33484] Avg episode reward: [(0, '6.585')] +[2023-06-20 23:36:08,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1314816. Throughput: 0: 280.2. Samples: 330002. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:08,275][33484] Avg episode reward: [(0, '6.521')] +[2023-06-20 23:36:13,268][33484] Fps is (10 sec: 819.5, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1318912. Throughput: 0: 280.2. Samples: 331680. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:13,270][33484] Avg episode reward: [(0, '6.406')] +[2023-06-20 23:36:18,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1327104. Throughput: 0: 280.1. Samples: 332522. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:18,274][33484] Avg episode reward: [(0, '6.371')] +[2023-06-20 23:36:23,273][33484] Fps is (10 sec: 1228.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1331200. Throughput: 0: 279.9. Samples: 334204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:23,274][33484] Avg episode reward: [(0, '6.284')] +[2023-06-20 23:36:28,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1339392. Throughput: 0: 280.2. Samples: 335894. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:28,275][33484] Avg episode reward: [(0, '6.216')] +[2023-06-20 23:36:33,271][33484] Fps is (10 sec: 1229.0, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1343488. Throughput: 0: 280.1. Samples: 336732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:36:33,272][33484] Avg episode reward: [(0, '5.875')] +[2023-06-20 23:36:38,266][33484] Fps is (10 sec: 819.8, 60 sec: 1092.4, 300 sec: 1110.8). Total num frames: 1347584. Throughput: 0: 280.2. Samples: 338416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:36:38,268][33484] Avg episode reward: [(0, '5.756')] +[2023-06-20 23:36:38,815][33879] Updated weights for policy 0, policy_version 330 (0.0010) +[2023-06-20 23:36:43,272][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.4, 300 sec: 1124.7). Total num frames: 1355776. Throughput: 0: 280.2. Samples: 340096. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:36:43,275][33484] Avg episode reward: [(0, '5.760')] +[2023-06-20 23:36:48,272][33484] Fps is (10 sec: 1228.0, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1359872. Throughput: 0: 280.4. Samples: 340940. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:48,274][33484] Avg episode reward: [(0, '5.719')] +[2023-06-20 23:36:53,270][33484] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1110.8). Total num frames: 1363968. Throughput: 0: 280.9. Samples: 342640. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:53,273][33484] Avg episode reward: [(0, '5.782')] +[2023-06-20 23:36:58,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1372160. Throughput: 0: 280.7. Samples: 344314. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:36:58,274][33484] Avg episode reward: [(0, '5.820')] +[2023-06-20 23:37:03,273][33484] Fps is (10 sec: 1228.4, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1376256. Throughput: 0: 280.6. Samples: 345150. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:03,275][33484] Avg episode reward: [(0, '5.908')] +[2023-06-20 23:37:04,279][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000337_1380352.pth... +[2023-06-20 23:37:04,345][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000271_1110016.pth +[2023-06-20 23:37:08,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1384448. Throughput: 0: 280.8. Samples: 346840. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:08,274][33484] Avg episode reward: [(0, '5.773')] +[2023-06-20 23:37:13,270][33484] Fps is (10 sec: 1229.1, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1388544. Throughput: 0: 280.7. Samples: 348524. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:13,271][33484] Avg episode reward: [(0, '5.844')] +[2023-06-20 23:37:15,221][33879] Updated weights for policy 0, policy_version 340 (0.0009) +[2023-06-20 23:37:18,273][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.7). Total num frames: 1392640. Throughput: 0: 280.8. Samples: 349368. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:18,275][33484] Avg episode reward: [(0, '5.962')] +[2023-06-20 23:37:23,271][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1400832. Throughput: 0: 280.9. Samples: 351056. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:23,274][33484] Avg episode reward: [(0, '6.198')] +[2023-06-20 23:37:28,273][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1404928. Throughput: 0: 281.2. Samples: 352748. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:28,274][33484] Avg episode reward: [(0, '6.376')] +[2023-06-20 23:37:33,272][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1409024. Throughput: 0: 281.2. Samples: 353592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:33,275][33484] Avg episode reward: [(0, '6.413')] +[2023-06-20 23:37:38,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.4, 300 sec: 1124.7). Total num frames: 1417216. Throughput: 0: 280.7. Samples: 355274. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:38,273][33484] Avg episode reward: [(0, '6.470')] +[2023-06-20 23:37:43,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1421312. Throughput: 0: 280.8. Samples: 356950. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:43,275][33484] Avg episode reward: [(0, '6.445')] +[2023-06-20 23:37:48,271][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.6, 300 sec: 1124.6). Total num frames: 1429504. Throughput: 0: 280.4. Samples: 357768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:48,273][33484] Avg episode reward: [(0, '6.396')] +[2023-06-20 23:37:51,801][33879] Updated weights for policy 0, policy_version 350 (0.0008) +[2023-06-20 23:37:53,273][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1433600. Throughput: 0: 280.4. Samples: 359460. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:53,274][33484] Avg episode reward: [(0, '6.435')] +[2023-06-20 23:37:58,269][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1437696. Throughput: 0: 280.3. Samples: 361138. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:37:58,273][33484] Avg episode reward: [(0, '6.545')] +[2023-06-20 23:38:03,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1445888. Throughput: 0: 280.4. Samples: 361986. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:03,276][33484] Avg episode reward: [(0, '6.314')] +[2023-06-20 23:38:08,273][33484] Fps is (10 sec: 1228.4, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1449984. Throughput: 0: 280.2. Samples: 363664. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:08,274][33484] Avg episode reward: [(0, '6.186')] +[2023-06-20 23:38:13,273][33484] Fps is (10 sec: 819.2, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1454080. Throughput: 0: 280.2. Samples: 365356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:13,276][33484] Avg episode reward: [(0, '6.264')] +[2023-06-20 23:38:18,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1462272. Throughput: 0: 280.2. Samples: 366200. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:18,274][33484] Avg episode reward: [(0, '5.814')] +[2023-06-20 23:38:23,271][33484] Fps is (10 sec: 1229.0, 60 sec: 1092.3, 300 sec: 1124.6). Total num frames: 1466368. Throughput: 0: 280.2. Samples: 367884. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:23,273][33484] Avg episode reward: [(0, '6.019')] +[2023-06-20 23:38:28,157][33879] Updated weights for policy 0, policy_version 360 (0.0008) +[2023-06-20 23:38:28,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1474560. Throughput: 0: 280.5. Samples: 369574. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:28,274][33484] Avg episode reward: [(0, '6.075')] +[2023-06-20 23:38:33,273][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1478656. Throughput: 0: 281.0. Samples: 370412. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:33,273][33484] Avg episode reward: [(0, '6.112')] +[2023-06-20 23:38:38,273][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1482752. Throughput: 0: 281.0. Samples: 372106. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:38,275][33484] Avg episode reward: [(0, '6.124')] +[2023-06-20 23:38:43,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1490944. Throughput: 0: 281.0. Samples: 373784. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:43,273][33484] Avg episode reward: [(0, '6.079')] +[2023-06-20 23:38:48,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1495040. Throughput: 0: 280.8. Samples: 374620. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:48,276][33484] Avg episode reward: [(0, '6.133')] +[2023-06-20 23:38:53,274][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1110.7). Total num frames: 1499136. Throughput: 0: 280.8. Samples: 376302. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:53,276][33484] Avg episode reward: [(0, '6.295')] +[2023-06-20 23:38:58,264][33484] Fps is (10 sec: 1229.9, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1507328. Throughput: 0: 280.7. Samples: 377984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:38:58,265][33484] Avg episode reward: [(0, '6.317')] +[2023-06-20 23:39:03,271][33484] Fps is (10 sec: 1229.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1511424. Throughput: 0: 280.5. Samples: 378820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:39:03,272][33484] Avg episode reward: [(0, '6.408')] +[2023-06-20 23:39:04,427][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000370_1515520.pth... +[2023-06-20 23:39:04,438][33879] Updated weights for policy 0, policy_version 370 (0.0008) +[2023-06-20 23:39:04,486][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000304_1245184.pth +[2023-06-20 23:39:08,271][33484] Fps is (10 sec: 1227.9, 60 sec: 1160.6, 300 sec: 1124.6). Total num frames: 1519616. Throughput: 0: 280.5. Samples: 380506. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:08,272][33484] Avg episode reward: [(0, '6.324')] +[2023-06-20 23:39:13,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1523712. Throughput: 0: 280.2. Samples: 382182. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:13,275][33484] Avg episode reward: [(0, '6.268')] +[2023-06-20 23:39:18,272][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1527808. Throughput: 0: 280.3. Samples: 383026. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:18,273][33484] Avg episode reward: [(0, '6.306')] +[2023-06-20 23:39:23,266][33484] Fps is (10 sec: 1229.5, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1536000. Throughput: 0: 280.2. Samples: 384714. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:23,268][33484] Avg episode reward: [(0, '6.398')] +[2023-06-20 23:39:28,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1540096. Throughput: 0: 280.1. Samples: 386388. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:28,273][33484] Avg episode reward: [(0, '6.488')] +[2023-06-20 23:39:33,272][33484] Fps is (10 sec: 818.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1544192. Throughput: 0: 280.1. Samples: 387226. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:33,275][33484] Avg episode reward: [(0, '6.411')] +[2023-06-20 23:39:38,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1552384. Throughput: 0: 279.9. Samples: 388898. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:38,273][33484] Avg episode reward: [(0, '6.310')] +[2023-06-20 23:39:41,036][33879] Updated weights for policy 0, policy_version 380 (0.0009) +[2023-06-20 23:39:43,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1556480. Throughput: 0: 280.0. Samples: 390584. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:39:43,273][33484] Avg episode reward: [(0, '6.511')] +[2023-06-20 23:39:48,274][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1560576. Throughput: 0: 280.1. Samples: 391426. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:39:48,277][33484] Avg episode reward: [(0, '6.571')] +[2023-06-20 23:39:53,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1568768. Throughput: 0: 280.0. Samples: 393106. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:39:53,273][33484] Avg episode reward: [(0, '6.490')] +[2023-06-20 23:39:58,272][33484] Fps is (10 sec: 1229.1, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1572864. Throughput: 0: 280.2. Samples: 394792. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:39:58,273][33484] Avg episode reward: [(0, '6.532')] +[2023-06-20 23:40:03,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1581056. Throughput: 0: 280.3. Samples: 395640. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:40:03,274][33484] Avg episode reward: [(0, '6.731')] +[2023-06-20 23:40:03,277][33878] Saving new best policy, reward=6.731! +[2023-06-20 23:40:08,271][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1585152. Throughput: 0: 280.3. Samples: 397328. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:40:08,273][33484] Avg episode reward: [(0, '6.548')] +[2023-06-20 23:40:13,273][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1589248. Throughput: 0: 280.1. Samples: 398994. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:40:13,274][33484] Avg episode reward: [(0, '6.595')] +[2023-06-20 23:40:17,615][33879] Updated weights for policy 0, policy_version 390 (0.0009) +[2023-06-20 23:40:18,273][33484] Fps is (10 sec: 1228.6, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1597440. Throughput: 0: 280.2. Samples: 399834. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:18,274][33484] Avg episode reward: [(0, '6.642')] +[2023-06-20 23:40:23,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1601536. Throughput: 0: 280.5. Samples: 401520. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:23,275][33484] Avg episode reward: [(0, '6.800')] +[2023-06-20 23:40:25,173][33878] Saving new best policy, reward=6.800! +[2023-06-20 23:40:28,274][33484] Fps is (10 sec: 819.1, 60 sec: 1092.2, 300 sec: 1110.8). Total num frames: 1605632. Throughput: 0: 280.5. Samples: 403206. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:40:28,275][33484] Avg episode reward: [(0, '6.837')] +[2023-06-20 23:40:28,613][33878] Saving new best policy, reward=6.837! +[2023-06-20 23:40:33,271][33484] Fps is (10 sec: 1229.2, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1613824. Throughput: 0: 280.8. Samples: 404060. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:33,271][33484] Avg episode reward: [(0, '6.694')] +[2023-06-20 23:40:38,274][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 1617920. Throughput: 0: 280.9. Samples: 405748. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:38,277][33484] Avg episode reward: [(0, '6.752')] +[2023-06-20 23:40:43,271][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1626112. Throughput: 0: 280.8. Samples: 407430. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:43,272][33484] Avg episode reward: [(0, '7.012')] +[2023-06-20 23:40:43,275][33878] Saving new best policy, reward=7.012! +[2023-06-20 23:40:48,273][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1630208. Throughput: 0: 280.7. Samples: 408274. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:48,275][33484] Avg episode reward: [(0, '6.848')] +[2023-06-20 23:40:53,271][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1634304. Throughput: 0: 280.5. Samples: 409952. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:40:53,273][33484] Avg episode reward: [(0, '7.043')] +[2023-06-20 23:40:54,096][33878] Saving new best policy, reward=7.043! +[2023-06-20 23:40:54,098][33879] Updated weights for policy 0, policy_version 400 (0.0009) +[2023-06-20 23:40:58,272][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1642496. Throughput: 0: 280.9. Samples: 411636. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:40:58,273][33484] Avg episode reward: [(0, '7.271')] +[2023-06-20 23:40:58,274][33878] Saving new best policy, reward=7.271! +[2023-06-20 23:41:03,269][33484] Fps is (10 sec: 1229.0, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1646592. Throughput: 0: 281.2. Samples: 412486. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:41:03,271][33484] Avg episode reward: [(0, '7.420')] +[2023-06-20 23:41:04,894][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000403_1650688.pth... +[2023-06-20 23:41:04,954][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000337_1380352.pth +[2023-06-20 23:41:04,959][33878] Saving new best policy, reward=7.420! +[2023-06-20 23:41:08,271][33484] Fps is (10 sec: 819.3, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1650688. Throughput: 0: 281.3. Samples: 414176. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:41:08,272][33484] Avg episode reward: [(0, '7.554')] +[2023-06-20 23:41:08,632][33878] Saving new best policy, reward=7.554! +[2023-06-20 23:41:13,272][33484] Fps is (10 sec: 1228.4, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1658880. Throughput: 0: 281.4. Samples: 415868. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:13,274][33484] Avg episode reward: [(0, '7.491')] +[2023-06-20 23:41:18,272][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1662976. Throughput: 0: 281.2. Samples: 416714. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:41:18,275][33484] Avg episode reward: [(0, '7.590')] +[2023-06-20 23:41:19,472][33878] Saving new best policy, reward=7.590! +[2023-06-20 23:41:23,265][33484] Fps is (10 sec: 1229.7, 60 sec: 1160.7, 300 sec: 1124.7). Total num frames: 1671168. Throughput: 0: 281.3. Samples: 418404. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:23,266][33484] Avg episode reward: [(0, '7.641')] +[2023-06-20 23:41:23,268][33878] Saving new best policy, reward=7.641! +[2023-06-20 23:41:28,271][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1675264. Throughput: 0: 281.1. Samples: 420078. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:28,272][33484] Avg episode reward: [(0, '7.815')] +[2023-06-20 23:41:28,273][33878] Saving new best policy, reward=7.815! +[2023-06-20 23:41:30,445][33879] Updated weights for policy 0, policy_version 410 (0.0008) +[2023-06-20 23:41:33,274][33484] Fps is (10 sec: 818.5, 60 sec: 1092.2, 300 sec: 1124.6). Total num frames: 1679360. Throughput: 0: 281.0. Samples: 420918. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:33,275][33484] Avg episode reward: [(0, '7.776')] +[2023-06-20 23:41:38,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1687552. Throughput: 0: 281.2. Samples: 422606. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:38,275][33484] Avg episode reward: [(0, '7.522')] +[2023-06-20 23:41:43,265][33484] Fps is (10 sec: 1229.9, 60 sec: 1092.4, 300 sec: 1124.7). Total num frames: 1691648. Throughput: 0: 281.2. Samples: 424288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:43,266][33484] Avg episode reward: [(0, '7.659')] +[2023-06-20 23:41:48,274][33484] Fps is (10 sec: 819.1, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1695744. Throughput: 0: 280.9. Samples: 425126. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:48,281][33484] Avg episode reward: [(0, '7.368')] +[2023-06-20 23:41:53,273][33484] Fps is (10 sec: 1227.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1703936. Throughput: 0: 280.7. Samples: 426806. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:53,274][33484] Avg episode reward: [(0, '7.342')] +[2023-06-20 23:41:58,273][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1708032. Throughput: 0: 280.7. Samples: 428498. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:41:58,274][33484] Avg episode reward: [(0, '7.368')] +[2023-06-20 23:42:03,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1716224. Throughput: 0: 280.4. Samples: 429332. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:03,274][33484] Avg episode reward: [(0, '7.192')] +[2023-06-20 23:42:06,716][33879] Updated weights for policy 0, policy_version 420 (0.0009) +[2023-06-20 23:42:08,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1720320. Throughput: 0: 280.4. Samples: 431022. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:08,272][33484] Avg episode reward: [(0, '7.587')] +[2023-06-20 23:42:13,273][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1724416. Throughput: 0: 281.1. Samples: 432728. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:13,277][33484] Avg episode reward: [(0, '7.427')] +[2023-06-20 23:42:18,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1732608. Throughput: 0: 280.9. Samples: 433560. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:18,274][33484] Avg episode reward: [(0, '7.248')] +[2023-06-20 23:42:23,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.1, 300 sec: 1124.7). Total num frames: 1736704. Throughput: 0: 280.7. Samples: 435238. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:23,274][33484] Avg episode reward: [(0, '7.518')] +[2023-06-20 23:42:28,270][33484] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1740800. Throughput: 0: 280.6. Samples: 436914. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:28,274][33484] Avg episode reward: [(0, '7.358')] +[2023-06-20 23:42:33,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1748992. Throughput: 0: 280.8. Samples: 437762. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:33,274][33484] Avg episode reward: [(0, '7.270')] +[2023-06-20 23:42:38,272][33484] Fps is (10 sec: 1228.5, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1753088. Throughput: 0: 280.8. Samples: 439444. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:38,275][33484] Avg episode reward: [(0, '7.519')] +[2023-06-20 23:42:43,202][33879] Updated weights for policy 0, policy_version 430 (0.0010) +[2023-06-20 23:42:43,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.4, 300 sec: 1124.7). Total num frames: 1761280. Throughput: 0: 280.8. Samples: 441136. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:43,274][33484] Avg episode reward: [(0, '7.642')] +[2023-06-20 23:42:48,273][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1765376. Throughput: 0: 281.3. Samples: 441992. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:48,273][33484] Avg episode reward: [(0, '7.837')] +[2023-06-20 23:42:48,274][33878] Saving new best policy, reward=7.837! +[2023-06-20 23:42:53,271][33484] Fps is (10 sec: 819.4, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1769472. Throughput: 0: 281.3. Samples: 443680. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:53,274][33484] Avg episode reward: [(0, '7.752')] +[2023-06-20 23:42:58,269][33484] Fps is (10 sec: 1229.3, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1777664. Throughput: 0: 281.0. Samples: 445372. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:42:58,270][33484] Avg episode reward: [(0, '7.669')] +[2023-06-20 23:43:03,272][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1781760. Throughput: 0: 281.0. Samples: 446206. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:03,275][33484] Avg episode reward: [(0, '7.729')] +[2023-06-20 23:43:04,986][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000436_1785856.pth... +[2023-06-20 23:43:05,043][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000370_1515520.pth +[2023-06-20 23:43:08,273][33484] Fps is (10 sec: 818.9, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1785856. Throughput: 0: 281.3. Samples: 447896. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:08,281][33484] Avg episode reward: [(0, '7.774')] +[2023-06-20 23:43:13,269][33484] Fps is (10 sec: 1229.2, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1794048. Throughput: 0: 281.5. Samples: 449582. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:13,271][33484] Avg episode reward: [(0, '7.758')] +[2023-06-20 23:43:18,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1798144. Throughput: 0: 281.6. Samples: 450432. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:18,274][33484] Avg episode reward: [(0, '8.138')] +[2023-06-20 23:43:19,561][33878] Saving new best policy, reward=8.138! +[2023-06-20 23:43:19,562][33879] Updated weights for policy 0, policy_version 440 (0.0010) +[2023-06-20 23:43:23,273][33484] Fps is (10 sec: 1228.3, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1806336. Throughput: 0: 282.0. Samples: 452132. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:23,274][33484] Avg episode reward: [(0, '8.336')] +[2023-06-20 23:43:23,276][33878] Saving new best policy, reward=8.336! +[2023-06-20 23:43:28,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1810432. Throughput: 0: 282.0. Samples: 453824. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:28,275][33484] Avg episode reward: [(0, '8.494')] +[2023-06-20 23:43:28,276][33878] Saving new best policy, reward=8.494! +[2023-06-20 23:43:33,273][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1814528. Throughput: 0: 281.6. Samples: 454664. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:33,274][33484] Avg episode reward: [(0, '8.631')] +[2023-06-20 23:43:34,145][33878] Saving new best policy, reward=8.631! +[2023-06-20 23:43:38,273][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.7). Total num frames: 1822720. Throughput: 0: 281.7. Samples: 456358. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:38,274][33484] Avg episode reward: [(0, '8.459')] +[2023-06-20 23:43:43,272][33484] Fps is (10 sec: 1229.0, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1826816. Throughput: 0: 281.6. Samples: 458044. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:43,274][33484] Avg episode reward: [(0, '9.078')] +[2023-06-20 23:43:45,168][33878] Saving new best policy, reward=9.078! +[2023-06-20 23:43:48,273][33484] Fps is (10 sec: 819.2, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1830912. Throughput: 0: 281.9. Samples: 458892. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:48,276][33484] Avg episode reward: [(0, '9.160')] +[2023-06-20 23:43:48,797][33878] Saving new best policy, reward=9.160! +[2023-06-20 23:43:53,272][33484] Fps is (10 sec: 1228.8, 60 sec: 1160.5, 300 sec: 1124.6). Total num frames: 1839104. Throughput: 0: 281.8. Samples: 460578. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:53,274][33484] Avg episode reward: [(0, '9.153')] +[2023-06-20 23:43:56,076][33879] Updated weights for policy 0, policy_version 450 (0.0010) +[2023-06-20 23:43:58,272][33484] Fps is (10 sec: 1228.9, 60 sec: 1092.2, 300 sec: 1124.7). Total num frames: 1843200. Throughput: 0: 282.0. Samples: 462272. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:43:58,273][33484] Avg episode reward: [(0, '9.398')] +[2023-06-20 23:43:59,699][33878] Saving new best policy, reward=9.398! +[2023-06-20 23:44:03,267][33484] Fps is (10 sec: 819.6, 60 sec: 1092.4, 300 sec: 1110.8). Total num frames: 1847296. Throughput: 0: 281.7. Samples: 463106. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-20 23:44:03,268][33484] Avg episode reward: [(0, '10.152')] +[2023-06-20 23:44:03,417][33878] Saving new best policy, reward=10.152! +[2023-06-20 23:44:08,270][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1855488. Throughput: 0: 281.4. Samples: 464796. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:44:08,272][33484] Avg episode reward: [(0, '10.534')] +[2023-06-20 23:44:08,273][33878] Saving new best policy, reward=10.534! +[2023-06-20 23:44:13,268][33484] Fps is (10 sec: 1228.6, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1859584. Throughput: 0: 281.3. Samples: 466480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:44:13,269][33484] Avg episode reward: [(0, '10.468')] +[2023-06-20 23:44:18,269][33484] Fps is (10 sec: 1229.0, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1867776. Throughput: 0: 282.4. Samples: 467372. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-06-20 23:44:18,272][33484] Avg episode reward: [(0, '10.098')] +[2023-06-20 23:44:23,269][33484] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1124.7). Total num frames: 1871872. Throughput: 0: 287.1. Samples: 469278. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:44:23,272][33484] Avg episode reward: [(0, '10.173')] +[2023-06-20 23:44:28,268][33484] Fps is (10 sec: 1228.9, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 1880064. Throughput: 0: 292.0. Samples: 471182. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:44:28,271][33484] Avg episode reward: [(0, '10.363')] +[2023-06-20 23:44:30,783][33879] Updated weights for policy 0, policy_version 460 (0.0013) +[2023-06-20 23:44:33,270][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1124.7). Total num frames: 1884160. Throughput: 0: 294.2. Samples: 472130. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:44:33,272][33484] Avg episode reward: [(0, '10.514')] +[2023-06-20 23:44:38,270][33484] Fps is (10 sec: 1228.7, 60 sec: 1160.6, 300 sec: 1138.6). Total num frames: 1892352. Throughput: 0: 299.1. Samples: 474036. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-06-20 23:44:38,279][33484] Avg episode reward: [(0, '10.696')] +[2023-06-20 23:44:40,378][33878] Saving new best policy, reward=10.696! +[2023-06-20 23:44:41,341][33484] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 33484], exiting... +[2023-06-20 23:44:41,343][33878] Stopping Batcher_0... +[2023-06-20 23:44:41,343][33484] Runner profile tree view: +main_loop: 1657.2042 +[2023-06-20 23:44:41,343][33484] Collected {0: 1896448}, FPS: 1144.4 +[2023-06-20 23:44:41,343][33878] Loop batcher_evt_loop terminating... +[2023-06-20 23:44:41,344][33878] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000463_1896448.pth... +[2023-06-20 23:44:41,451][33878] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000403_1650688.pth +[2023-06-20 23:44:41,471][33878] Stopping LearnerWorker_p0... +[2023-06-20 23:44:41,473][33878] Loop learner_proc0_evt_loop terminating... +[2023-06-20 23:44:41,492][33879] Weights refcount: 2 0 +[2023-06-20 23:44:41,515][33879] Stopping InferenceWorker_p0-w0... +[2023-06-20 23:44:41,516][33879] Loop inference_proc0-0_evt_loop terminating... +[2023-06-20 23:44:41,599][33881] Stopping RolloutWorker_w1... +[2023-06-20 23:44:41,600][33881] Loop rollout_proc1_evt_loop terminating... +[2023-06-20 23:44:41,626][33887] Stopping RolloutWorker_w6... +[2023-06-20 23:44:41,631][33887] Loop rollout_proc6_evt_loop terminating... +[2023-06-20 23:44:41,631][33888] Stopping RolloutWorker_w7... +[2023-06-20 23:44:41,632][33888] Loop rollout_proc7_evt_loop terminating... +[2023-06-20 23:44:41,635][33883] Stopping RolloutWorker_w3... +[2023-06-20 23:44:41,635][33883] Loop rollout_proc3_evt_loop terminating... +[2023-06-20 23:44:41,635][33882] Stopping RolloutWorker_w2... +[2023-06-20 23:44:41,636][33882] Loop rollout_proc2_evt_loop terminating... +[2023-06-20 23:44:41,652][33885] Stopping RolloutWorker_w5... +[2023-06-20 23:44:41,655][33885] Loop rollout_proc5_evt_loop terminating... +[2023-06-20 23:44:41,662][33880] Stopping RolloutWorker_w0... +[2023-06-20 23:44:41,663][33880] Loop rollout_proc0_evt_loop terminating... +[2023-06-20 23:44:41,673][33884] Stopping RolloutWorker_w4... +[2023-06-20 23:44:41,674][33884] Loop rollout_proc4_evt_loop terminating... +[2023-06-20 23:44:47,471][33484] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-20 23:44:47,472][33484] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-20 23:44:47,473][33484] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-20 23:44:47,473][33484] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-20 23:44:47,473][33484] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-20 23:44:47,473][33484] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-20 23:44:47,474][33484] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-06-20 23:44:47,474][33484] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-20 23:44:47,474][33484] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-06-20 23:44:47,475][33484] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-06-20 23:44:47,475][33484] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-20 23:44:47,475][33484] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-20 23:44:47,475][33484] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-20 23:44:47,475][33484] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-20 23:44:47,476][33484] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-20 23:44:47,481][33484] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-20 23:44:47,482][33484] RunningMeanStd input shape: (3, 72, 128) +[2023-06-20 23:44:47,484][33484] RunningMeanStd input shape: (1,) +[2023-06-20 23:44:47,511][33484] ConvEncoder: input_channels=3 +[2023-06-20 23:44:47,574][33484] Conv encoder output size: 512 +[2023-06-20 23:44:47,575][33484] Policy head output size: 512 +[2023-06-20 23:44:47,591][33484] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000463_1896448.pth... +[2023-06-20 23:44:49,664][33484] Num frames 100... +[2023-06-20 23:44:50,448][33484] Num frames 200... +[2023-06-20 23:44:51,234][33484] Num frames 300... +[2023-06-20 23:44:51,955][33484] Num frames 400... +[2023-06-20 23:44:52,664][33484] Avg episode rewards: #0: 7.800, true rewards: #0: 4.800 +[2023-06-20 23:44:52,667][33484] Avg episode reward: 7.800, avg true_objective: 4.800 +[2023-06-20 23:44:52,819][33484] Num frames 500... +[2023-06-20 23:44:53,621][33484] Num frames 600... +[2023-06-20 23:44:54,427][33484] Num frames 700... +[2023-06-20 23:44:55,186][33484] Num frames 800... +[2023-06-20 23:44:55,921][33484] Num frames 900... +[2023-06-20 23:44:56,222][33484] Avg episode rewards: #0: 6.640, true rewards: #0: 4.640 +[2023-06-20 23:44:56,225][33484] Avg episode reward: 6.640, avg true_objective: 4.640 +[2023-06-20 23:44:56,779][33484] Num frames 1000... +[2023-06-20 23:44:57,576][33484] Num frames 1100... +[2023-06-20 23:44:58,334][33484] Num frames 1200... +[2023-06-20 23:44:59,149][33484] Num frames 1300... +[2023-06-20 23:45:00,016][33484] Num frames 1400... +[2023-06-20 23:45:00,857][33484] Num frames 1500... +[2023-06-20 23:45:01,769][33484] Num frames 1600... +[2023-06-20 23:45:02,716][33484] Num frames 1700... +[2023-06-20 23:45:03,653][33484] Num frames 1800... +[2023-06-20 23:45:04,425][33484] Num frames 1900... +[2023-06-20 23:45:04,924][33484] Avg episode rewards: #0: 12.507, true rewards: #0: 6.507 +[2023-06-20 23:45:04,926][33484] Avg episode reward: 12.507, avg true_objective: 6.507 +[2023-06-20 23:45:05,279][33484] Num frames 2000... +[2023-06-20 23:45:06,004][33484] Num frames 2100... +[2023-06-20 23:45:06,806][33484] Num frames 2200... +[2023-06-20 23:45:07,630][33484] Num frames 2300... +[2023-06-20 23:45:08,433][33484] Num frames 2400... +[2023-06-20 23:45:09,250][33484] Num frames 2500... +[2023-06-20 23:45:10,056][33484] Num frames 2600... +[2023-06-20 23:45:10,861][33484] Num frames 2700... +[2023-06-20 23:45:11,629][33484] Num frames 2800... +[2023-06-20 23:45:12,371][33484] Num frames 2900... +[2023-06-20 23:45:12,545][33484] Avg episode rewards: #0: 14.030, true rewards: #0: 7.280 +[2023-06-20 23:45:12,547][33484] Avg episode reward: 14.030, avg true_objective: 7.280 +[2023-06-20 23:45:13,285][33484] Num frames 3000... +[2023-06-20 23:45:14,036][33484] Num frames 3100... +[2023-06-20 23:45:14,779][33484] Num frames 3200... +[2023-06-20 23:45:15,525][33484] Num frames 3300... +[2023-06-20 23:45:16,325][33484] Avg episode rewards: #0: 12.784, true rewards: #0: 6.784 +[2023-06-20 23:45:16,327][33484] Avg episode reward: 12.784, avg true_objective: 6.784 +[2023-06-20 23:45:16,384][33484] Num frames 3400... +[2023-06-20 23:45:17,111][33484] Num frames 3500... +[2023-06-20 23:45:17,877][33484] Num frames 3600... +[2023-06-20 23:45:18,629][33484] Num frames 3700... +[2023-06-20 23:45:19,408][33484] Num frames 3800... +[2023-06-20 23:45:20,213][33484] Num frames 3900... +[2023-06-20 23:45:20,845][33484] Avg episode rewards: #0: 12.447, true rewards: #0: 6.613 +[2023-06-20 23:45:20,847][33484] Avg episode reward: 12.447, avg true_objective: 6.613 +[2023-06-20 23:45:21,080][33484] Num frames 4000... +[2023-06-20 23:45:21,808][33484] Num frames 4100... +[2023-06-20 23:45:22,535][33484] Num frames 4200... +[2023-06-20 23:45:23,270][33484] Num frames 4300... +[2023-06-20 23:45:23,977][33484] Num frames 4400... +[2023-06-20 23:45:24,687][33484] Num frames 4500... +[2023-06-20 23:45:25,411][33484] Num frames 4600... +[2023-06-20 23:45:26,168][33484] Num frames 4700... +[2023-06-20 23:45:26,509][33484] Avg episode rewards: #0: 12.480, true rewards: #0: 6.766 +[2023-06-20 23:45:26,510][33484] Avg episode reward: 12.480, avg true_objective: 6.766 +[2023-06-20 23:45:26,982][33484] Num frames 4800... +[2023-06-20 23:45:27,749][33484] Num frames 4900... +[2023-06-20 23:45:28,513][33484] Num frames 5000... +[2023-06-20 23:45:29,311][33484] Num frames 5100... +[2023-06-20 23:45:30,107][33484] Num frames 5200... +[2023-06-20 23:45:30,914][33484] Num frames 5300... +[2023-06-20 23:45:31,679][33484] Num frames 5400... +[2023-06-20 23:45:32,515][33484] Num frames 5500... +[2023-06-20 23:45:33,445][33484] Num frames 5600... +[2023-06-20 23:45:34,211][33484] Num frames 5700... +[2023-06-20 23:45:35,071][33484] Num frames 5800... +[2023-06-20 23:45:35,915][33484] Avg episode rewards: #0: 13.735, true rewards: #0: 7.360 +[2023-06-20 23:45:35,918][33484] Avg episode reward: 13.735, avg true_objective: 7.360 +[2023-06-20 23:45:36,019][33484] Num frames 5900... +[2023-06-20 23:45:36,912][33484] Num frames 6000... +[2023-06-20 23:45:37,805][33484] Num frames 6100... +[2023-06-20 23:45:38,704][33484] Num frames 6200... +[2023-06-20 23:45:39,621][33484] Num frames 6300... +[2023-06-20 23:45:40,578][33484] Num frames 6400... +[2023-06-20 23:45:41,536][33484] Num frames 6500... +[2023-06-20 23:45:42,449][33484] Num frames 6600... +[2023-06-20 23:45:43,406][33484] Num frames 6700... +[2023-06-20 23:45:44,418][33484] Num frames 6800... +[2023-06-20 23:45:45,331][33484] Num frames 6900... +[2023-06-20 23:45:46,033][33484] Avg episode rewards: #0: 14.751, true rewards: #0: 7.751 +[2023-06-20 23:45:46,033][33484] Avg episode reward: 14.751, avg true_objective: 7.751 +[2023-06-20 23:45:46,198][33484] Num frames 7000... +[2023-06-20 23:45:46,950][33484] Num frames 7100... +[2023-06-20 23:45:47,679][33484] Num frames 7200... +[2023-06-20 23:45:48,463][33484] Num frames 7300... +[2023-06-20 23:45:49,250][33484] Num frames 7400... +[2023-06-20 23:45:49,994][33484] Num frames 7500... +[2023-06-20 23:45:50,680][33484] Avg episode rewards: #0: 14.384, true rewards: #0: 7.584 +[2023-06-20 23:45:50,683][33484] Avg episode reward: 14.384, avg true_objective: 7.584 +[2023-06-20 23:45:54,782][33484] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4! +[2023-06-21 00:02:44,504][62782] Saving configuration to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json... +[2023-06-21 00:02:44,528][62782] Rollout worker 0 uses device cpu +[2023-06-21 00:02:44,529][62782] Rollout worker 1 uses device cpu +[2023-06-21 00:02:44,529][62782] Rollout worker 2 uses device cpu +[2023-06-21 00:02:44,530][62782] Rollout worker 3 uses device cpu +[2023-06-21 00:02:44,530][62782] Rollout worker 4 uses device cpu +[2023-06-21 00:02:44,530][62782] Rollout worker 5 uses device cpu +[2023-06-21 00:02:44,531][62782] Rollout worker 6 uses device cpu +[2023-06-21 00:02:44,531][62782] Rollout worker 7 uses device cpu +[2023-06-21 00:02:44,719][62782] InferenceWorker_p0-w0: min num requests: 2 +[2023-06-21 00:02:44,752][62782] Starting all processes... +[2023-06-21 00:02:44,753][62782] Starting process learner_proc0 +[2023-06-21 00:02:44,803][62782] Starting all processes... +[2023-06-21 00:02:44,856][62782] Starting process inference_proc0-0 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc0 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc1 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc2 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc3 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc4 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc5 +[2023-06-21 00:02:44,856][62782] Starting process rollout_proc6 +[2023-06-21 00:02:44,858][62782] Starting process rollout_proc7 +[2023-06-21 00:02:46,946][62893] Starting seed is not provided +[2023-06-21 00:02:46,946][62893] Initializing actor-critic model on device cpu +[2023-06-21 00:02:46,946][62893] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 00:02:46,947][62896] On MacOS, not setting affinity +[2023-06-21 00:02:46,947][62893] RunningMeanStd input shape: (1,) +[2023-06-21 00:02:46,966][62893] ConvEncoder: input_channels=3 +[2023-06-21 00:02:47,013][62903] On MacOS, not setting affinity +[2023-06-21 00:02:47,013][62898] On MacOS, not setting affinity +[2023-06-21 00:02:47,013][62897] On MacOS, not setting affinity +[2023-06-21 00:02:47,024][62901] On MacOS, not setting affinity +[2023-06-21 00:02:47,049][62899] On MacOS, not setting affinity +[2023-06-21 00:02:47,066][62902] On MacOS, not setting affinity +[2023-06-21 00:02:47,069][62893] Conv encoder output size: 512 +[2023-06-21 00:02:47,070][62893] Policy head output size: 512 +[2023-06-21 00:02:47,091][62893] Created Actor Critic model with architecture: +[2023-06-21 00:02:47,091][62893] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-06-21 00:02:47,095][62893] Using optimizer +[2023-06-21 00:02:47,095][62893] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000463_1896448.pth... +[2023-06-21 00:02:47,102][62900] On MacOS, not setting affinity +[2023-06-21 00:02:47,125][62893] Loading model from checkpoint +[2023-06-21 00:02:47,135][62893] Loaded experiment state at self.train_step=463, self.env_steps=1896448 +[2023-06-21 00:02:47,136][62893] Initialized policy 0 weights for model version 463 +[2023-06-21 00:02:47,137][62893] LearnerWorker_p0 finished initialization! +[2023-06-21 00:02:47,139][62895] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 00:02:47,140][62895] RunningMeanStd input shape: (1,) +[2023-06-21 00:02:47,155][62895] ConvEncoder: input_channels=3 +[2023-06-21 00:02:47,207][62895] Conv encoder output size: 512 +[2023-06-21 00:02:47,207][62895] Policy head output size: 512 +[2023-06-21 00:02:47,219][62782] Inference worker 0-0 is ready! +[2023-06-21 00:02:47,221][62782] All inference workers are ready! Signal rollout workers to start! +[2023-06-21 00:02:47,263][62899] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,270][62901] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,270][62903] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,272][62900] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,273][62902] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,280][62897] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,282][62898] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:47,285][62896] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 00:02:48,705][62782] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 1896448. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 00:02:49,591][62898] Decorrelating experience for 0 frames... +[2023-06-21 00:02:49,591][62899] Decorrelating experience for 0 frames... +[2023-06-21 00:02:49,592][62901] Decorrelating experience for 0 frames... +[2023-06-21 00:02:49,592][62902] Decorrelating experience for 0 frames... +[2023-06-21 00:02:49,593][62900] Decorrelating experience for 0 frames... +[2023-06-21 00:02:49,595][62903] Decorrelating experience for 0 frames... +[2023-06-21 00:02:50,546][62900] Decorrelating experience for 32 frames... +[2023-06-21 00:02:50,573][62899] Decorrelating experience for 32 frames... +[2023-06-21 00:02:50,574][62898] Decorrelating experience for 32 frames... +[2023-06-21 00:02:50,574][62902] Decorrelating experience for 32 frames... +[2023-06-21 00:02:50,575][62901] Decorrelating experience for 32 frames... +[2023-06-21 00:02:50,583][62897] Decorrelating experience for 0 frames... +[2023-06-21 00:02:51,245][62897] Decorrelating experience for 32 frames... +[2023-06-21 00:02:51,246][62896] Decorrelating experience for 0 frames... +[2023-06-21 00:02:51,955][62896] Decorrelating experience for 32 frames... +[2023-06-21 00:02:51,957][62903] Decorrelating experience for 32 frames... +[2023-06-21 00:02:52,185][62898] Decorrelating experience for 64 frames... +[2023-06-21 00:02:52,187][62901] Decorrelating experience for 64 frames... +[2023-06-21 00:02:52,187][62900] Decorrelating experience for 64 frames... +[2023-06-21 00:02:52,191][62902] Decorrelating experience for 64 frames... +[2023-06-21 00:02:52,640][62899] Decorrelating experience for 64 frames... +[2023-06-21 00:02:52,707][62897] Decorrelating experience for 64 frames... +[2023-06-21 00:02:53,472][62903] Decorrelating experience for 64 frames... +[2023-06-21 00:02:53,474][62896] Decorrelating experience for 64 frames... +[2023-06-21 00:02:53,706][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1896448. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 00:02:54,695][62900] Decorrelating experience for 96 frames... +[2023-06-21 00:02:54,696][62902] Decorrelating experience for 96 frames... +[2023-06-21 00:02:54,700][62901] Decorrelating experience for 96 frames... +[2023-06-21 00:02:54,702][62898] Decorrelating experience for 96 frames... +[2023-06-21 00:02:55,417][62899] Decorrelating experience for 96 frames... +[2023-06-21 00:02:55,419][62897] Decorrelating experience for 96 frames... +[2023-06-21 00:02:55,751][62896] Decorrelating experience for 96 frames... +[2023-06-21 00:02:56,156][62903] Decorrelating experience for 96 frames... +[2023-06-21 00:02:58,705][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1896448. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 00:02:58,708][62782] Avg episode reward: [(0, '0.320')] +[2023-06-21 00:03:03,705][62782] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1896448. Throughput: 0: 100.1. Samples: 1502. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-06-21 00:03:03,708][62782] Avg episode reward: [(0, '1.444')] +[2023-06-21 00:03:04,710][62782] Heartbeat connected on Batcher_0 +[2023-06-21 00:03:04,714][62782] Heartbeat connected on LearnerWorker_p0 +[2023-06-21 00:03:04,740][62782] Heartbeat connected on InferenceWorker_p0-w0 +[2023-06-21 00:03:04,783][62782] Heartbeat connected on RolloutWorker_w0 +[2023-06-21 00:03:04,789][62782] Heartbeat connected on RolloutWorker_w3 +[2023-06-21 00:03:04,789][62782] Heartbeat connected on RolloutWorker_w4 +[2023-06-21 00:03:04,810][62782] Heartbeat connected on RolloutWorker_w1 +[2023-06-21 00:03:04,812][62782] Heartbeat connected on RolloutWorker_w2 +[2023-06-21 00:03:04,817][62782] Heartbeat connected on RolloutWorker_w5 +[2023-06-21 00:03:04,823][62782] Heartbeat connected on RolloutWorker_w6 +[2023-06-21 00:03:04,824][62782] Heartbeat connected on RolloutWorker_w7 +[2023-06-21 00:03:08,704][62782] Fps is (10 sec: 819.3, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 1904640. Throughput: 0: 170.6. Samples: 3412. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-06-21 00:03:08,705][62782] Avg episode reward: [(0, '2.944')] +[2023-06-21 00:03:13,703][62782] Fps is (10 sec: 1638.7, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 1912832. Throughput: 0: 174.4. Samples: 4360. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:03:13,706][62782] Avg episode reward: [(0, '4.354')] +[2023-06-21 00:03:18,705][62782] Fps is (10 sec: 1228.7, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 1916928. Throughput: 0: 208.9. Samples: 6268. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:18,705][62782] Avg episode reward: [(0, '6.182')] +[2023-06-21 00:03:23,700][62782] Fps is (10 sec: 1229.1, 60 sec: 819.3, 300 sec: 819.3). Total num frames: 1925120. Throughput: 0: 233.3. Samples: 8166. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:23,703][62782] Avg episode reward: [(0, '7.207')] +[2023-06-21 00:03:28,709][62782] Fps is (10 sec: 1228.4, 60 sec: 819.1, 300 sec: 819.1). Total num frames: 1929216. Throughput: 0: 228.2. Samples: 9130. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:28,713][62782] Avg episode reward: [(0, '7.606')] +[2023-06-21 00:03:32,792][62895] Updated weights for policy 0, policy_version 473 (0.0011) +[2023-06-21 00:03:33,704][62782] Fps is (10 sec: 1228.4, 60 sec: 910.2, 300 sec: 910.2). Total num frames: 1937408. Throughput: 0: 245.0. Samples: 11026. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:33,705][62782] Avg episode reward: [(0, '8.380')] +[2023-06-21 00:03:38,704][62782] Fps is (10 sec: 1229.3, 60 sec: 901.1, 300 sec: 901.1). Total num frames: 1941504. Throughput: 0: 287.3. Samples: 12928. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:03:38,707][62782] Avg episode reward: [(0, '8.388')] +[2023-06-21 00:03:43,704][62782] Fps is (10 sec: 1228.8, 60 sec: 968.2, 300 sec: 968.2). Total num frames: 1949696. Throughput: 0: 308.5. Samples: 13884. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:43,705][62782] Avg episode reward: [(0, '8.642')] +[2023-06-21 00:03:48,705][62782] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 955.7). Total num frames: 1953792. Throughput: 0: 317.3. Samples: 15780. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:48,709][62782] Avg episode reward: [(0, '8.993')] +[2023-06-21 00:03:53,705][62782] Fps is (10 sec: 1228.7, 60 sec: 1092.3, 300 sec: 1008.2). Total num frames: 1961984. Throughput: 0: 316.7. Samples: 17664. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:03:53,708][62782] Avg episode reward: [(0, '9.600')] +[2023-06-21 00:03:58,702][62782] Fps is (10 sec: 1229.2, 60 sec: 1160.6, 300 sec: 994.8). Total num frames: 1966080. Throughput: 0: 316.6. Samples: 18606. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:03:58,705][62782] Avg episode reward: [(0, '10.099')] +[2023-06-21 00:04:03,704][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1037.7). Total num frames: 1974272. Throughput: 0: 316.5. Samples: 20512. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:04:03,706][62782] Avg episode reward: [(0, '9.877')] +[2023-06-21 00:04:05,292][62895] Updated weights for policy 0, policy_version 483 (0.0016) +[2023-06-21 00:04:08,705][62782] Fps is (10 sec: 1637.9, 60 sec: 1297.0, 300 sec: 1075.2). Total num frames: 1982464. Throughput: 0: 316.5. Samples: 22410. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:04:08,707][62782] Avg episode reward: [(0, '9.789')] +[2023-06-21 00:04:13,704][62782] Fps is (10 sec: 1228.8, 60 sec: 1228.8, 300 sec: 1060.2). Total num frames: 1986560. Throughput: 0: 316.4. Samples: 23366. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:04:13,706][62782] Avg episode reward: [(0, '9.602')] +[2023-06-21 00:04:18,704][62782] Fps is (10 sec: 1228.9, 60 sec: 1297.1, 300 sec: 1092.3). Total num frames: 1994752. Throughput: 0: 316.5. Samples: 25270. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-06-21 00:04:18,707][62782] Avg episode reward: [(0, '9.301')] +[2023-06-21 00:04:23,705][62782] Fps is (10 sec: 1228.7, 60 sec: 1228.7, 300 sec: 1077.9). Total num frames: 1998848. Throughput: 0: 316.6. Samples: 27176. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-06-21 00:04:23,709][62782] Avg episode reward: [(0, '9.217')] +[2023-06-21 00:04:27,884][62893] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 00:04:27,890][62782] Component Batcher_0 stopped! +[2023-06-21 00:04:27,884][62893] Stopping Batcher_0... +[2023-06-21 00:04:27,910][62893] Loop batcher_evt_loop terminating... +[2023-06-21 00:04:28,051][62895] Weights refcount: 2 0 +[2023-06-21 00:04:28,052][62895] Stopping InferenceWorker_p0-w0... +[2023-06-21 00:04:28,052][62895] Loop inference_proc0-0_evt_loop terminating... +[2023-06-21 00:04:28,052][62782] Component InferenceWorker_p0-w0 stopped! +[2023-06-21 00:04:28,059][62893] Removing /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000436_1785856.pth +[2023-06-21 00:04:28,093][62893] Saving /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 00:04:28,146][62898] Stopping RolloutWorker_w2... +[2023-06-21 00:04:28,147][62898] Loop rollout_proc2_evt_loop terminating... +[2023-06-21 00:04:28,149][62897] Stopping RolloutWorker_w1... +[2023-06-21 00:04:28,146][62782] Component RolloutWorker_w2 stopped! +[2023-06-21 00:04:28,159][62897] Loop rollout_proc1_evt_loop terminating... +[2023-06-21 00:04:28,159][62782] Component RolloutWorker_w1 stopped! +[2023-06-21 00:04:28,172][62902] Stopping RolloutWorker_w6... +[2023-06-21 00:04:28,173][62902] Loop rollout_proc6_evt_loop terminating... +[2023-06-21 00:04:28,172][62903] Stopping RolloutWorker_w7... +[2023-06-21 00:04:28,173][62903] Loop rollout_proc7_evt_loop terminating... +[2023-06-21 00:04:28,172][62782] Component RolloutWorker_w6 stopped! +[2023-06-21 00:04:28,174][62782] Component RolloutWorker_w7 stopped! +[2023-06-21 00:04:28,178][62900] Stopping RolloutWorker_w4... +[2023-06-21 00:04:28,179][62782] Component RolloutWorker_w4 stopped! +[2023-06-21 00:04:28,179][62900] Loop rollout_proc4_evt_loop terminating... +[2023-06-21 00:04:28,186][62896] Stopping RolloutWorker_w0... +[2023-06-21 00:04:28,187][62896] Loop rollout_proc0_evt_loop terminating... +[2023-06-21 00:04:28,189][62782] Component RolloutWorker_w0 stopped! +[2023-06-21 00:04:28,238][62899] Stopping RolloutWorker_w3... +[2023-06-21 00:04:28,238][62899] Loop rollout_proc3_evt_loop terminating... +[2023-06-21 00:04:28,238][62782] Component RolloutWorker_w3 stopped! +[2023-06-21 00:04:28,260][62901] Stopping RolloutWorker_w5... +[2023-06-21 00:04:28,262][62901] Loop rollout_proc5_evt_loop terminating... +[2023-06-21 00:04:28,260][62782] Component RolloutWorker_w5 stopped! +[2023-06-21 00:04:28,323][62893] Stopping LearnerWorker_p0... +[2023-06-21 00:04:28,323][62893] Loop learner_proc0_evt_loop terminating... +[2023-06-21 00:04:28,323][62782] Component LearnerWorker_p0 stopped! +[2023-06-21 00:04:28,325][62782] Waiting for process learner_proc0 to stop... +[2023-06-21 00:04:28,717][62782] Waiting for process inference_proc0-0 to join... +[2023-06-21 00:04:28,724][62782] Waiting for process rollout_proc0 to join... +[2023-06-21 00:04:28,725][62782] Waiting for process rollout_proc1 to join... +[2023-06-21 00:04:28,725][62782] Waiting for process rollout_proc2 to join... +[2023-06-21 00:04:28,725][62782] Waiting for process rollout_proc3 to join... +[2023-06-21 00:04:28,726][62782] Waiting for process rollout_proc4 to join... +[2023-06-21 00:04:28,726][62782] Waiting for process rollout_proc5 to join... +[2023-06-21 00:04:28,727][62782] Waiting for process rollout_proc6 to join... +[2023-06-21 00:04:28,727][62782] Waiting for process rollout_proc7 to join... +[2023-06-21 00:04:28,727][62782] Batcher 0 profile tree view: +batching: 0.3430, releasing_batches: 0.0005 +[2023-06-21 00:04:28,728][62782] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0015 + wait_policy_total: 70.9955 +update_model: 0.1313 + weight_update: 0.0013 +one_step: 0.0044 + handle_policy_step: 28.8837 + deserialize: 0.2579, stack: 0.0465, obs_to_device_normalize: 1.9937, forward: 25.4962, send_messages: 0.2765 + prepare_outputs: 0.3301 + to_cpu: 0.0368 +[2023-06-21 00:04:28,728][62782] Learner 0 profile tree view: +misc: 0.0005, prepare_batch: 12.2746 +train: 41.6131 + epoch_init: 0.0001, minibatch_init: 0.0003, losses_postprocess: 0.0009, kl_divergence: 0.0039, after_optimizer: 0.0241 + calculate_losses: 23.6560 + losses_init: 0.0000, forward_head: 22.8739, bptt_initial: 0.0424, tail: 0.0327, advantages_returns: 0.0036, losses: 0.0138 + bptt: 0.6834 + bptt_forward_core: 0.6739 + update: 17.9093 + clip: 0.0352 +[2023-06-21 00:04:28,728][62782] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0024, enqueue_policy_requests: 0.2131, env_step: 89.0550, overhead: 0.1938, complete_rollouts: 0.0033 +save_policy_outputs: 0.0962 + split_output_tensors: 0.0471 +[2023-06-21 00:04:28,729][62782] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.0026, enqueue_policy_requests: 0.1817, env_step: 88.7206, overhead: 0.1951, complete_rollouts: 0.0031 +save_policy_outputs: 0.0956 + split_output_tensors: 0.0468 +[2023-06-21 00:04:28,730][62782] Loop Runner_EvtLoop terminating... +[2023-06-21 00:04:28,730][62782] Runner profile tree view: +main_loop: 103.9782 +[2023-06-21 00:04:28,730][62782] Collected {0: 2007040}, FPS: 1063.6 +[2023-06-21 01:10:24,615][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 01:10:24,616][62782] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-21 01:10:24,617][62782] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-21 01:10:24,617][62782] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-21 01:10:24,617][62782] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 01:10:24,618][62782] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-21 01:10:24,618][62782] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 01:10:24,618][62782] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-21 01:10:24,619][62782] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-06-21 01:10:24,619][62782] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-06-21 01:10:24,620][62782] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-21 01:10:24,620][62782] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-21 01:10:24,621][62782] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-21 01:10:24,621][62782] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-21 01:10:24,621][62782] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-21 01:10:24,637][62782] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-06-21 01:10:24,642][62782] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 01:10:24,657][62782] RunningMeanStd input shape: (1,) +[2023-06-21 01:10:24,714][62782] ConvEncoder: input_channels=3 +[2023-06-21 01:10:24,809][62782] Conv encoder output size: 512 +[2023-06-21 01:10:24,809][62782] Policy head output size: 512 +[2023-06-21 01:10:24,831][62782] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 01:10:26,601][62782] Num frames 100... +[2023-06-21 01:10:27,376][62782] Num frames 200... +[2023-06-21 01:10:28,115][62782] Num frames 300... +[2023-06-21 01:10:28,566][62782] Avg episode rewards: #0: 3.450, true rewards: #0: 3.450 +[2023-06-21 01:10:28,569][62782] Avg episode reward: 3.450, avg true_objective: 3.450 +[2023-06-21 01:10:28,988][62782] Num frames 400... +[2023-06-21 01:10:29,799][62782] Num frames 500... +[2023-06-21 01:10:30,594][62782] Num frames 600... +[2023-06-21 01:10:31,385][62782] Num frames 700... +[2023-06-21 01:10:32,175][62782] Num frames 800... +[2023-06-21 01:10:32,923][62782] Num frames 900... +[2023-06-21 01:10:33,649][62782] Avg episode rewards: #0: 6.925, true rewards: #0: 4.925 +[2023-06-21 01:10:33,649][62782] Avg episode reward: 6.925, avg true_objective: 4.925 +[2023-06-21 01:10:33,754][62782] Num frames 1000... +[2023-06-21 01:10:34,512][62782] Num frames 1100... +[2023-06-21 01:10:35,298][62782] Num frames 1200... +[2023-06-21 01:10:36,117][62782] Num frames 1300... +[2023-06-21 01:10:36,907][62782] Num frames 1400... +[2023-06-21 01:10:37,689][62782] Num frames 1500... +[2023-06-21 01:10:38,511][62782] Num frames 1600... +[2023-06-21 01:10:39,326][62782] Num frames 1700... +[2023-06-21 01:10:40,127][62782] Num frames 1800... +[2023-06-21 01:10:40,908][62782] Num frames 1900... +[2023-06-21 01:10:41,701][62782] Num frames 2000... +[2023-06-21 01:10:42,373][62782] Avg episode rewards: #0: 11.243, true rewards: #0: 6.910 +[2023-06-21 01:10:42,375][62782] Avg episode reward: 11.243, avg true_objective: 6.910 +[2023-06-21 01:10:42,576][62782] Num frames 2100... +[2023-06-21 01:10:43,384][62782] Num frames 2200... +[2023-06-21 01:10:44,175][62782] Num frames 2300... +[2023-06-21 01:10:44,944][62782] Num frames 2400... +[2023-06-21 01:10:45,713][62782] Avg episode rewards: #0: 9.973, true rewards: #0: 6.222 +[2023-06-21 01:10:45,715][62782] Avg episode reward: 9.973, avg true_objective: 6.222 +[2023-06-21 01:10:45,808][62782] Num frames 2500... +[2023-06-21 01:10:46,590][62782] Num frames 2600... +[2023-06-21 01:10:47,418][62782] Num frames 2700... +[2023-06-21 01:10:48,230][62782] Num frames 2800... +[2023-06-21 01:10:49,030][62782] Num frames 2900... +[2023-06-21 01:10:49,785][62782] Num frames 3000... +[2023-06-21 01:10:49,889][62782] Avg episode rewards: #0: 9.802, true rewards: #0: 6.002 +[2023-06-21 01:10:49,890][62782] Avg episode reward: 9.802, avg true_objective: 6.002 +[2023-06-21 01:10:50,647][62782] Num frames 3100... +[2023-06-21 01:10:51,401][62782] Num frames 3200... +[2023-06-21 01:10:52,199][62782] Num frames 3300... +[2023-06-21 01:10:52,936][62782] Num frames 3400... +[2023-06-21 01:10:53,687][62782] Num frames 3500... +[2023-06-21 01:10:54,480][62782] Num frames 3600... +[2023-06-21 01:10:55,284][62782] Num frames 3700... +[2023-06-21 01:10:56,092][62782] Num frames 3800... +[2023-06-21 01:10:56,898][62782] Num frames 3900... +[2023-06-21 01:10:57,610][62782] Num frames 4000... +[2023-06-21 01:10:58,336][62782] Num frames 4100... +[2023-06-21 01:10:59,123][62782] Num frames 4200... +[2023-06-21 01:10:59,531][62782] Avg episode rewards: #0: 12.067, true rewards: #0: 7.067 +[2023-06-21 01:10:59,531][62782] Avg episode reward: 12.067, avg true_objective: 7.067 +[2023-06-21 01:10:59,947][62782] Num frames 4300... +[2023-06-21 01:11:00,664][62782] Num frames 4400... +[2023-06-21 01:11:01,403][62782] Num frames 4500... +[2023-06-21 01:11:02,173][62782] Avg episode rewards: #0: 10.989, true rewards: #0: 6.560 +[2023-06-21 01:11:02,175][62782] Avg episode reward: 10.989, avg true_objective: 6.560 +[2023-06-21 01:11:02,239][62782] Num frames 4600... +[2023-06-21 01:11:02,962][62782] Num frames 4700... +[2023-06-21 01:11:03,733][62782] Num frames 4800... +[2023-06-21 01:11:04,529][62782] Num frames 4900... +[2023-06-21 01:11:05,337][62782] Num frames 5000... +[2023-06-21 01:11:06,045][62782] Num frames 5100... +[2023-06-21 01:11:06,749][62782] Num frames 5200... +[2023-06-21 01:11:07,570][62782] Avg episode rewards: #0: 10.995, true rewards: #0: 6.620 +[2023-06-21 01:11:07,573][62782] Avg episode reward: 10.995, avg true_objective: 6.620 +[2023-06-21 01:11:07,606][62782] Num frames 5300... +[2023-06-21 01:11:08,368][62782] Num frames 5400... +[2023-06-21 01:11:09,164][62782] Num frames 5500... +[2023-06-21 01:11:09,968][62782] Num frames 5600... +[2023-06-21 01:11:10,739][62782] Num frames 5700... +[2023-06-21 01:11:11,134][62782] Avg episode rewards: #0: 10.382, true rewards: #0: 6.382 +[2023-06-21 01:11:11,135][62782] Avg episode reward: 10.382, avg true_objective: 6.382 +[2023-06-21 01:11:11,541][62782] Num frames 5800... +[2023-06-21 01:11:12,283][62782] Num frames 5900... +[2023-06-21 01:11:13,055][62782] Num frames 6000... +[2023-06-21 01:11:13,797][62782] Num frames 6100... +[2023-06-21 01:11:14,421][62782] Avg episode rewards: #0: 10.167, true rewards: #0: 6.167 +[2023-06-21 01:11:14,422][62782] Avg episode reward: 10.167, avg true_objective: 6.167 +[2023-06-21 01:11:27,119][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4! +[2023-06-21 01:11:57,079][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 01:11:57,080][62782] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-21 01:11:57,081][62782] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-21 01:11:57,082][62782] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-21 01:11:57,082][62782] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 01:11:57,083][62782] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-21 01:11:57,083][62782] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-06-21 01:11:57,084][62782] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-21 01:11:57,085][62782] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-06-21 01:11:57,086][62782] Adding new argument 'hf_repository'='mihirdeo16/vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-06-21 01:11:57,087][62782] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-21 01:11:57,088][62782] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-21 01:11:57,089][62782] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-21 01:11:57,089][62782] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-21 01:11:57,090][62782] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-21 01:11:57,099][62782] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 01:11:57,101][62782] RunningMeanStd input shape: (1,) +[2023-06-21 01:11:57,110][62782] ConvEncoder: input_channels=3 +[2023-06-21 01:11:57,130][62782] Conv encoder output size: 512 +[2023-06-21 01:11:57,131][62782] Policy head output size: 512 +[2023-06-21 01:11:57,137][62782] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 01:11:58,662][62782] Num frames 100... +[2023-06-21 01:11:59,389][62782] Num frames 200... +[2023-06-21 01:11:59,866][62782] Avg episode rewards: #0: 2.560, true rewards: #0: 2.560 +[2023-06-21 01:11:59,868][62782] Avg episode reward: 2.560, avg true_objective: 2.560 +[2023-06-21 01:12:00,215][62782] Num frames 300... +[2023-06-21 01:12:01,006][62782] Num frames 400... +[2023-06-21 01:12:01,793][62782] Num frames 500... +[2023-06-21 01:12:02,570][62782] Num frames 600... +[2023-06-21 01:12:03,365][62782] Num frames 700... +[2023-06-21 01:12:03,485][62782] Avg episode rewards: #0: 4.020, true rewards: #0: 3.520 +[2023-06-21 01:12:03,487][62782] Avg episode reward: 4.020, avg true_objective: 3.520 +[2023-06-21 01:12:04,273][62782] Num frames 800... +[2023-06-21 01:12:05,089][62782] Num frames 900... +[2023-06-21 01:12:05,928][62782] Num frames 1000... +[2023-06-21 01:12:06,695][62782] Num frames 1100... +[2023-06-21 01:12:07,467][62782] Num frames 1200... +[2023-06-21 01:12:07,901][62782] Avg episode rewards: #0: 6.160, true rewards: #0: 4.160 +[2023-06-21 01:12:07,902][62782] Avg episode reward: 6.160, avg true_objective: 4.160 +[2023-06-21 01:12:08,312][62782] Num frames 1300... +[2023-06-21 01:12:09,102][62782] Num frames 1400... +[2023-06-21 01:12:09,907][62782] Num frames 1500... +[2023-06-21 01:12:10,724][62782] Num frames 1600... +[2023-06-21 01:12:11,468][62782] Num frames 1700... +[2023-06-21 01:12:12,185][62782] Num frames 1800... +[2023-06-21 01:12:12,912][62782] Num frames 1900... +[2023-06-21 01:12:13,717][62782] Num frames 2000... +[2023-06-21 01:12:14,532][62782] Num frames 2100... +[2023-06-21 01:12:15,315][62782] Num frames 2200... +[2023-06-21 01:12:16,118][62782] Num frames 2300... +[2023-06-21 01:12:16,243][62782] Avg episode rewards: #0: 10.760, true rewards: #0: 5.760 +[2023-06-21 01:12:16,245][62782] Avg episode reward: 10.760, avg true_objective: 5.760 +[2023-06-21 01:12:17,006][62782] Num frames 2400... +[2023-06-21 01:12:17,747][62782] Num frames 2500... +[2023-06-21 01:12:18,490][62782] Num frames 2600... +[2023-06-21 01:12:19,233][62782] Avg episode rewards: #0: 9.376, true rewards: #0: 5.376 +[2023-06-21 01:12:19,234][62782] Avg episode reward: 9.376, avg true_objective: 5.376 +[2023-06-21 01:12:19,323][62782] Num frames 2700... +[2023-06-21 01:12:20,069][62782] Num frames 2800... +[2023-06-21 01:12:20,861][62782] Num frames 2900... +[2023-06-21 01:12:21,650][62782] Num frames 3000... +[2023-06-21 01:12:22,504][62782] Num frames 3100... +[2023-06-21 01:12:22,942][62782] Avg episode rewards: #0: 8.727, true rewards: #0: 5.227 +[2023-06-21 01:12:22,945][62782] Avg episode reward: 8.727, avg true_objective: 5.227 +[2023-06-21 01:12:23,529][62782] Num frames 3200... +[2023-06-21 01:12:24,445][62782] Num frames 3300... +[2023-06-21 01:12:25,394][62782] Num frames 3400... +[2023-06-21 01:12:26,338][62782] Num frames 3500... +[2023-06-21 01:12:26,614][62782] Avg episode rewards: #0: 8.029, true rewards: #0: 5.029 +[2023-06-21 01:12:26,617][62782] Avg episode reward: 8.029, avg true_objective: 5.029 +[2023-06-21 01:12:27,245][62782] Num frames 3600... +[2023-06-21 01:12:28,032][62782] Num frames 3700... +[2023-06-21 01:12:28,928][62782] Num frames 3800... +[2023-06-21 01:12:30,201][62782] Num frames 3900... +[2023-06-21 01:12:31,224][62782] Num frames 4000... +[2023-06-21 01:12:32,163][62782] Num frames 4100... +[2023-06-21 01:12:32,910][62782] Num frames 4200... +[2023-06-21 01:12:33,657][62782] Num frames 4300... +[2023-06-21 01:12:34,468][62782] Num frames 4400... +[2023-06-21 01:12:35,280][62782] Num frames 4500... +[2023-06-21 01:12:36,099][62782] Num frames 4600... +[2023-06-21 01:12:36,229][62782] Avg episode rewards: #0: 9.629, true rewards: #0: 5.754 +[2023-06-21 01:12:36,230][62782] Avg episode reward: 9.629, avg true_objective: 5.754 +[2023-06-21 01:12:37,020][62782] Num frames 4700... +[2023-06-21 01:12:37,828][62782] Num frames 4800... +[2023-06-21 01:12:38,618][62782] Num frames 4900... +[2023-06-21 01:12:39,376][62782] Num frames 5000... +[2023-06-21 01:12:40,129][62782] Num frames 5100... +[2023-06-21 01:12:40,628][62782] Avg episode rewards: #0: 9.608, true rewards: #0: 5.719 +[2023-06-21 01:12:40,629][62782] Avg episode reward: 9.608, avg true_objective: 5.719 +[2023-06-21 01:12:41,107][62782] Num frames 5200... +[2023-06-21 01:12:42,009][62782] Num frames 5300... +[2023-06-21 01:12:42,929][62782] Num frames 5400... +[2023-06-21 01:12:43,865][62782] Num frames 5500... +[2023-06-21 01:12:44,767][62782] Num frames 5600... +[2023-06-21 01:12:45,659][62782] Num frames 5700... +[2023-06-21 01:12:46,594][62782] Num frames 5800... +[2023-06-21 01:12:47,369][62782] Avg episode rewards: #0: 10.075, true rewards: #0: 5.875 +[2023-06-21 01:12:47,371][62782] Avg episode reward: 10.075, avg true_objective: 5.875 +[2023-06-21 01:12:55,232][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4! +[2023-06-21 01:15:12,180][62782] Loading existing experiment configuration from /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/config.json +[2023-06-21 01:15:12,181][62782] Overriding arg 'num_workers' with value 1 passed from command line +[2023-06-21 01:15:12,181][62782] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-06-21 01:15:12,181][62782] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-06-21 01:15:12,182][62782] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-06-21 01:15:12,182][62782] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-06-21 01:15:12,182][62782] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-06-21 01:15:12,182][62782] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-06-21 01:15:12,183][62782] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-06-21 01:15:12,183][62782] Adding new argument 'hf_repository'='mihirdeo16/vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-06-21 01:15:12,183][62782] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-06-21 01:15:12,184][62782] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-06-21 01:15:12,184][62782] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-06-21 01:15:12,184][62782] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-06-21 01:15:12,185][62782] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-06-21 01:15:12,192][62782] RunningMeanStd input shape: (3, 72, 128) +[2023-06-21 01:15:12,196][62782] RunningMeanStd input shape: (1,) +[2023-06-21 01:15:12,232][62782] ConvEncoder: input_channels=3 +[2023-06-21 01:15:12,256][62782] Conv encoder output size: 512 +[2023-06-21 01:15:12,256][62782] Policy head output size: 512 +[2023-06-21 01:15:12,274][62782] Loading state from checkpoint /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... +[2023-06-21 01:15:14,407][62782] Num frames 100... +[2023-06-21 01:15:15,193][62782] Num frames 200... +[2023-06-21 01:15:15,999][62782] Num frames 300... +[2023-06-21 01:15:16,784][62782] Num frames 400... +[2023-06-21 01:15:16,996][62782] Avg episode rewards: #0: 6.160, true rewards: #0: 4.160 +[2023-06-21 01:15:16,998][62782] Avg episode reward: 6.160, avg true_objective: 4.160 +[2023-06-21 01:15:17,655][62782] Num frames 500... +[2023-06-21 01:15:18,452][62782] Num frames 600... +[2023-06-21 01:15:19,239][62782] Num frames 700... +[2023-06-21 01:15:19,989][62782] Num frames 800... +[2023-06-21 01:15:20,736][62782] Num frames 900... +[2023-06-21 01:15:21,546][62782] Num frames 1000... +[2023-06-21 01:15:22,311][62782] Num frames 1100... +[2023-06-21 01:15:23,120][62782] Num frames 1200... +[2023-06-21 01:15:23,934][62782] Num frames 1300... +[2023-06-21 01:15:24,122][62782] Avg episode rewards: #0: 12.060, true rewards: #0: 6.560 +[2023-06-21 01:15:24,123][62782] Avg episode reward: 12.060, avg true_objective: 6.560 +[2023-06-21 01:15:24,863][62782] Num frames 1400... +[2023-06-21 01:15:25,757][62782] Num frames 1500... +[2023-06-21 01:15:26,617][62782] Num frames 1600... +[2023-06-21 01:15:27,495][62782] Num frames 1700... +[2023-06-21 01:15:28,454][62782] Num frames 1800... +[2023-06-21 01:15:28,770][62782] Avg episode rewards: #0: 10.747, true rewards: #0: 6.080 +[2023-06-21 01:15:28,773][62782] Avg episode reward: 10.747, avg true_objective: 6.080 +[2023-06-21 01:15:29,474][62782] Num frames 1900... +[2023-06-21 01:15:30,427][62782] Num frames 2000... +[2023-06-21 01:15:31,384][62782] Num frames 2100... +[2023-06-21 01:15:32,333][62782] Num frames 2200... +[2023-06-21 01:15:33,250][62782] Num frames 2300... +[2023-06-21 01:15:34,172][62782] Num frames 2400... +[2023-06-21 01:15:35,121][62782] Num frames 2500... +[2023-06-21 01:15:36,071][62782] Num frames 2600... +[2023-06-21 01:15:37,006][62782] Num frames 2700... +[2023-06-21 01:15:37,910][62782] Num frames 2800... +[2023-06-21 01:15:38,836][62782] Num frames 2900... +[2023-06-21 01:15:39,785][62782] Num frames 3000... +[2023-06-21 01:15:40,202][62782] Avg episode rewards: #0: 14.100, true rewards: #0: 7.600 +[2023-06-21 01:15:40,204][62782] Avg episode reward: 14.100, avg true_objective: 7.600 +[2023-06-21 01:15:40,708][62782] Num frames 3100... +[2023-06-21 01:15:41,612][62782] Num frames 3200... +[2023-06-21 01:15:42,567][62782] Num frames 3300... +[2023-06-21 01:15:43,508][62782] Num frames 3400... +[2023-06-21 01:15:44,338][62782] Num frames 3500... +[2023-06-21 01:15:45,255][62782] Num frames 3600... +[2023-06-21 01:15:46,151][62782] Num frames 3700... +[2023-06-21 01:15:46,679][62782] Avg episode rewards: #0: 13.888, true rewards: #0: 7.488 +[2023-06-21 01:15:46,682][62782] Avg episode reward: 13.888, avg true_objective: 7.488 +[2023-06-21 01:15:47,190][62782] Num frames 3800... +[2023-06-21 01:15:48,133][62782] Num frames 3900... +[2023-06-21 01:15:49,082][62782] Num frames 4000... +[2023-06-21 01:15:49,955][62782] Num frames 4100... +[2023-06-21 01:15:50,871][62782] Num frames 4200... +[2023-06-21 01:15:51,719][62782] Num frames 4300... +[2023-06-21 01:15:52,254][62782] Avg episode rewards: #0: 13.087, true rewards: #0: 7.253 +[2023-06-21 01:15:52,256][62782] Avg episode reward: 13.087, avg true_objective: 7.253 +[2023-06-21 01:15:52,649][62782] Num frames 4400... +[2023-06-21 01:15:53,515][62782] Num frames 4500... +[2023-06-21 01:15:54,394][62782] Num frames 4600... +[2023-06-21 01:15:55,283][62782] Num frames 4700... +[2023-06-21 01:15:56,214][62782] Num frames 4800... +[2023-06-21 01:15:57,103][62782] Num frames 4900... +[2023-06-21 01:15:57,989][62782] Num frames 5000... +[2023-06-21 01:15:58,885][62782] Num frames 5100... +[2023-06-21 01:15:59,762][62782] Num frames 5200... +[2023-06-21 01:16:00,556][62782] Avg episode rewards: #0: 14.114, true rewards: #0: 7.543 +[2023-06-21 01:16:00,558][62782] Avg episode reward: 14.114, avg true_objective: 7.543 +[2023-06-21 01:16:00,741][62782] Num frames 5300... +[2023-06-21 01:16:01,649][62782] Num frames 5400... +[2023-06-21 01:16:02,582][62782] Num frames 5500... +[2023-06-21 01:16:03,490][62782] Num frames 5600... +[2023-06-21 01:16:04,383][62782] Num frames 5700... +[2023-06-21 01:16:05,260][62782] Num frames 5800... +[2023-06-21 01:16:05,579][62782] Avg episode rewards: #0: 13.280, true rewards: #0: 7.280 +[2023-06-21 01:16:05,581][62782] Avg episode reward: 13.280, avg true_objective: 7.280 +[2023-06-21 01:16:06,211][62782] Num frames 5900... +[2023-06-21 01:16:06,936][62782] Num frames 6000... +[2023-06-21 01:16:07,685][62782] Num frames 6100... +[2023-06-21 01:16:08,455][62782] Num frames 6200... +[2023-06-21 01:16:09,272][62782] Num frames 6300... +[2023-06-21 01:16:09,637][62782] Avg episode rewards: #0: 12.707, true rewards: #0: 7.040 +[2023-06-21 01:16:09,640][62782] Avg episode reward: 12.707, avg true_objective: 7.040 +[2023-06-21 01:16:10,134][62782] Num frames 6400... +[2023-06-21 01:16:10,955][62782] Num frames 6500... +[2023-06-21 01:16:11,757][62782] Num frames 6600... +[2023-06-21 01:16:12,562][62782] Num frames 6700... +[2023-06-21 01:16:13,391][62782] Num frames 6800... +[2023-06-21 01:16:14,116][62782] Num frames 6900... +[2023-06-21 01:16:14,304][62782] Avg episode rewards: #0: 12.412, true rewards: #0: 6.912 +[2023-06-21 01:16:14,307][62782] Avg episode reward: 12.412, avg true_objective: 6.912 +[2023-06-21 01:16:23,345][62782] Replay video saved to /Users/md/Code/python/jubilant-memory/RL/train_dir/default_experiment/replay.mp4!