File size: 2,266 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch


def value_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
    r"""
    Overview:
        A function to reduce the scale of the action-value function.
        :math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \eps * x` .
    Arguments:
        - x: (:obj:`torch.Tensor`) The input tensor to be normalized.
        - eps: (:obj:`float`) The coefficient of the additive regularization term \
            to ensure h^{-1} is Lipschitz continuous
    Returns:
        - (:obj:`torch.Tensor`) Normalized tensor.

    .. note::
        Observe and Look Further: Achieving Consistent Performance on Atari
         (https://arxiv.org/abs/1805.11593)
    """
    return torch.sign(x) * (torch.sqrt(torch.abs(x) + 1) - 1) + eps * x


def value_inv_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
    r"""
    Overview:
        The inverse form of value rescale.
        :math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\eps(|x|+1+\eps)}-1}{2\eps})}^2-1)` .
    Arguments:
        - x: (:obj:`torch.Tensor`) The input tensor to be unnormalized.
        - eps: (:obj:`float`) The coefficient of the additive regularization term \
            to ensure h^{-1} is Lipschitz continuous
    Returns:
        - (:obj:`torch.Tensor`) Unnormalized tensor.
    """
    return torch.sign(x) * (((torch.sqrt(1 + 4 * eps * (torch.abs(x) + 1 + eps)) - 1) / (2 * eps)) ** 2 - 1)


def symlog(x: torch.Tensor) -> torch.Tensor:
    r"""
    Overview:
        A function to normalize the targets.
        :math: `symlog(x) = sign(x)(\ln{|x|+1})` .
    Arguments:
        - x: (:obj:`torch.Tensor`) The input tensor to be normalized.
    Returns:
        - (:obj:`torch.Tensor`) Normalized tensor.

    .. note::
        Mastering Diverse Domains through World Models
         (https://arxiv.org/abs/2301.04104)
    """
    return torch.sign(x) * (torch.log(torch.abs(x) + 1))


def inv_symlog(x: torch.Tensor) -> torch.Tensor:
    r"""
    Overview:
        The inverse form of symlog.
        :math: `symexp(x) = sign(x)(\exp{|x|}-1)` .
    Arguments:
        - x: (:obj:`torch.Tensor`) The input tensor to be unnormalized.
    Returns:
        - (:obj:`torch.Tensor`) Unnormalized tensor.
    """
    return torch.sign(x) * (torch.exp(torch.abs(x)) - 1)