File size: 2,266 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import torch
def value_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
r"""
Overview:
A function to reduce the scale of the action-value function.
:math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \eps * x` .
Arguments:
- x: (:obj:`torch.Tensor`) The input tensor to be normalized.
- eps: (:obj:`float`) The coefficient of the additive regularization term \
to ensure h^{-1} is Lipschitz continuous
Returns:
- (:obj:`torch.Tensor`) Normalized tensor.
.. note::
Observe and Look Further: Achieving Consistent Performance on Atari
(https://arxiv.org/abs/1805.11593)
"""
return torch.sign(x) * (torch.sqrt(torch.abs(x) + 1) - 1) + eps * x
def value_inv_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
r"""
Overview:
The inverse form of value rescale.
:math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\eps(|x|+1+\eps)}-1}{2\eps})}^2-1)` .
Arguments:
- x: (:obj:`torch.Tensor`) The input tensor to be unnormalized.
- eps: (:obj:`float`) The coefficient of the additive regularization term \
to ensure h^{-1} is Lipschitz continuous
Returns:
- (:obj:`torch.Tensor`) Unnormalized tensor.
"""
return torch.sign(x) * (((torch.sqrt(1 + 4 * eps * (torch.abs(x) + 1 + eps)) - 1) / (2 * eps)) ** 2 - 1)
def symlog(x: torch.Tensor) -> torch.Tensor:
r"""
Overview:
A function to normalize the targets.
:math: `symlog(x) = sign(x)(\ln{|x|+1})` .
Arguments:
- x: (:obj:`torch.Tensor`) The input tensor to be normalized.
Returns:
- (:obj:`torch.Tensor`) Normalized tensor.
.. note::
Mastering Diverse Domains through World Models
(https://arxiv.org/abs/2301.04104)
"""
return torch.sign(x) * (torch.log(torch.abs(x) + 1))
def inv_symlog(x: torch.Tensor) -> torch.Tensor:
r"""
Overview:
The inverse form of symlog.
:math: `symexp(x) = sign(x)(\exp{|x|}-1)` .
Arguments:
- x: (:obj:`torch.Tensor`) The input tensor to be unnormalized.
Returns:
- (:obj:`torch.Tensor`) Unnormalized tensor.
"""
return torch.sign(x) * (torch.exp(torch.abs(x)) - 1)
|