File size: 2,093 Bytes
6a62ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import logging

from fairseq.data.audio.frm_text_to_speech_dataset import FrmTextToSpeechDatasetCreator
from fairseq.tasks import register_task
from fairseq.tasks.text_to_speech import TextToSpeechTask


logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


@register_task("frm_text_to_speech")
class FrmTextToSpeechTask(TextToSpeechTask):
    @staticmethod
    def add_args(parser):
        TextToSpeechTask.add_args(parser)
        parser.add_argument("--do_chunk", action="store_true", help="train on chunks")
        parser.add_argument("--chunk_bound", default=-1, type=int)
        parser.add_argument("--chunk_init", default=50, type=int)
        parser.add_argument("--chunk_incr", default=5, type=int)
        parser.add_argument("--add_eos", action="store_true")
        parser.add_argument("--dedup", action="store_true")
        parser.add_argument("--ref_fpu", default=-1, type=float)

    def load_dataset(self, split, **unused_kwargs):
        is_train_split = split.startswith("train")
        pre_tokenizer = self.build_tokenizer(self.args)
        bpe_tokenizer = self.build_bpe(self.args)
        self.datasets[split] = FrmTextToSpeechDatasetCreator.from_tsv(
            self.args.data,
            self.data_cfg,
            split,
            self.src_dict,
            pre_tokenizer,
            bpe_tokenizer,
            is_train_split=is_train_split,
            n_frames_per_step=self.args.n_frames_per_step,
            speaker_to_id=self.speaker_to_id,
            do_chunk=self.args.do_chunk,
            chunk_bound=self.args.chunk_bound,
            chunk_init=self.args.chunk_init,
            chunk_incr=self.args.chunk_incr,
            add_eos=self.args.add_eos,
            dedup=self.args.dedup,
            ref_fpu=self.args.ref_fpu,
        )