--- tags: - Pong-PLE-v0 - reinforce - reinforcement-learning - custom-implementation - deep-rl-class model-index: - name: pong-policy results: - task: type: reinforcement-learning name: reinforcement-learning dataset: name: Pong-PLE-v0 type: Pong-PLE-v0 metrics: - type: mean_reward value: -16.00 +/- 0.00 name: mean_reward verified: false --- ## parameters pong_hyperparameters = {
"h_size": 64,
"n_training_episodes": 20000,
"n_evaluation_episodes": 10,
"max_t": 5000,
"gamma": 0.99,
"lr": 1e-2,
"env_id": env_id,
"state_space": s_size,
"action_space": a_size,
}