File size: 2,448 Bytes
46a75d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import unittest

import torch as T

from TTS.tts.layers.tacotron.tacotron import CBHG, Decoder, Encoder, Prenet

# pylint: disable=unused-variable


class PrenetTests(unittest.TestCase):
    def test_in_out(self):  # pylint: disable=no-self-use
        layer = Prenet(128, out_features=[256, 128])
        dummy_input = T.rand(4, 128)

        print(layer)
        output = layer(dummy_input)
        assert output.shape[0] == 4
        assert output.shape[1] == 128


class CBHGTests(unittest.TestCase):
    def test_in_out(self):
        # pylint: disable=attribute-defined-outside-init
        layer = self.cbhg = CBHG(
            128,
            K=8,
            conv_bank_features=80,
            conv_projections=[160, 128],
            highway_features=80,
            gru_features=80,
            num_highways=4,
        )
        # B x D x T
        dummy_input = T.rand(4, 128, 8)

        print(layer)
        output = layer(dummy_input)
        assert output.shape[0] == 4
        assert output.shape[1] == 8
        assert output.shape[2] == 160


class DecoderTests(unittest.TestCase):
    @staticmethod
    def test_in_out():
        layer = Decoder(
            in_channels=256,
            frame_channels=80,
            r=2,
            memory_size=4,
            attn_windowing=False,
            attn_norm="sigmoid",
            attn_K=5,
            attn_type="original",
            prenet_type="original",
            prenet_dropout=True,
            forward_attn=True,
            trans_agent=True,
            forward_attn_mask=True,
            location_attn=True,
            separate_stopnet=True,
            max_decoder_steps=50,
        )
        dummy_input = T.rand(4, 8, 256)
        dummy_memory = T.rand(4, 2, 80)

        output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None)

        assert output.shape[0] == 4
        assert output.shape[1] == 80, "size not {}".format(output.shape[1])
        assert output.shape[2] == 2, "size not {}".format(output.shape[2])
        assert stop_tokens.shape[0] == 4


class EncoderTests(unittest.TestCase):
    def test_in_out(self):  # pylint: disable=no-self-use
        layer = Encoder(128)
        dummy_input = T.rand(4, 8, 128)

        print(layer)
        output = layer(dummy_input)
        print(output.shape)
        assert output.shape[0] == 4
        assert output.shape[1] == 8
        assert output.shape[2] == 256  # 128 * 2 BiRNN