igorcheb
/

REINFORCE-LunarLanderContinuous-v2

Reinforcement Learning

LunarLanderContinuous-v2

custom-implementation

Model card Files Files and versions Community

igorcheb commited on Jan 17, 2023

Commit

3207519

·

1 Parent(s): 920e52d

Create Agent_class.py

Files changed (1) hide show

Agent_class.py +49 -0

Agent_class.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+class ParameterisedPolicy(torch.nn.Module):
+    """
+    REINFORCE RL agent class. Returns action when the ParameterisedPolicy.act(observation) is used.
+    observation is a gym state vector.
+    obs_len - length of the state vector
+    act_space_len - length of the action vector
+    """
+    def __init__(self, obs_len=8, act_space_len=2):
+        super().__init__()
+        self.deterministic = False
+        self.continuous = True
+        self.obs_len = obs_len
+        self.act_space_len = act_space_len
+        self.lin_1 = torch.nn.Linear(self.obs_len, 256)
+        self.rel_1 = torch.nn.ReLU()
+        self.lin_2 = torch.nn.Linear(256, 128)
+        self.rel_2 = torch.nn.ReLU()
+        self.lin_3 = torch.nn.Linear(128, self.act_space_len)
+        self.lin_4 = torch.nn.Linear(128, self.act_space_len)
+        self.elu = torch.nn.ELU()
+    def forward(self, x):
+        x = self.lin_1(x)
+        x = self.rel_1(x)
+        x = self.lin_2(x)
+        x = self.rel_2(x)
+        mu = self.lin_3(x)
+        x = self.lin_4(x)
+        sigma = self.elu(x) + 1.000001
+        return mu, sigma
+    def act(self, observation):
+        (mus, sigmas) = self.forward(torch.tensor(observation, dtype=torch.float32))
+        m = torch.distributions.normal.Normal(mus, sigmas)
+        action = m.sample().detach().numpy()
+        return action