general_cfg: algo_name: PER_DQN collect_traj: false device: cuda env_name: gym load_checkpoint: false load_model_step: best load_path: Train_single_CartPole-v1_DQN_20230515-211721 max_episode: 100 max_step: 200 mode: train model_save_fre: 500 mp_backend: single n_learners: 1 n_workers: 2 online_eval: true online_eval_episode: 10 seed: 1 share_buffer: true algo_cfg: batch_size: 64 buffer_size: 100000 buffer_type: PER_QUE epsilon_decay: 1000 epsilon_end: 0.01 epsilon_start: 0.95 gamma: 0.99 lr: 0.0001 per_alpha: 0.6 per_beta: 0.4 per_beta_annealing: 0.001 per_epsilon: 0.01 target_update: 4 value_layers: - activation: relu layer_size: - 256 layer_type: linear - activation: relu layer_size: - 256 layer_type: linear env_cfg: id: CartPole-v1 ignore_params: - wrapper - ignore_params render_mode: null wrapper: null