File size: 1,712 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import pytest
import time
from itertools import product
import numpy as np
import torch
from ding.rl_utils import ppg_data, ppg_joint_error
use_value_clip_args = [True, False]
random_weight = torch.rand(4) + 1
weight_args = [None, random_weight]
args = [item for item in product(*[use_value_clip_args, weight_args])]
# due to numeric stability of this unittest, we rerun it when sporadic error occurs
@pytest.mark.parametrize('use_value_clip, weight', args)
def test_ppg(use_value_clip, weight):
error_count = 0
while True:
torch.manual_seed(time.time())
B, N = 4, 32
logit_new = torch.randn(B, N).add_(0.1).clamp_(0.1, 0.99)
logit_old = logit_new.add_(torch.rand_like(logit_new) * 0.1).clamp_(0.1, 0.99)
logit_new.requires_grad_(True)
logit_old.requires_grad_(True)
action = torch.randint(0, N, size=(B, ))
value_new = torch.randn(B).requires_grad_(True)
value_old = value_new + torch.rand_like(value_new) * 0.1
return_ = torch.randn(B) * 2
data = ppg_data(logit_new, logit_old, action, value_new, value_old, return_, weight)
loss = ppg_joint_error(data, use_value_clip=use_value_clip)
assert all([l.shape == tuple() for l in loss])
assert logit_new.grad is None
assert value_new.grad is None
total_loss = sum(loss)
try:
total_loss.backward()
except RuntimeError as e:
print("[ERROR]: {}".format(e))
if error_count == 10:
break
error_count += 1
continue
assert isinstance(logit_new.grad, torch.Tensor)
assert isinstance(value_new.grad, torch.Tensor)
break
|