File size: 1,242 Bytes
9b0dd40
84467ca
9b0dd40
 
 
 
 
 
 
 
 
 
 
84467ca
 
 
 
 
9b0dd40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from typing import Tuple
import subprocess

from torch import no_grad, package
import numpy as np
import os





class PreTrainedPipeline():
    def __init__(self, path: str):
        # Install espeak-ng
        subprocess.run("apt-get update -y && apt-get install espeak-ng -y", shell=True, 
            universal_newlines=True, start_new_session=True)

        # Init model
        model_path = os.path.join(path, "model.pt")
        importer = package.PackageImporter(model_path)
        synt = importer.load_pickle("tts_models", "model")
        self.synt = synt

        self.tts_kwargs = {
            "speaker_name": "uk",
            "language_name": "uk",
        }

        self.sampling_rate = self.synt.output_sample_rate

    def __call__(self, inputs: str) -> Tuple[np.array, int]:
        """
        Args:
            inputs (:obj:`str`):
                The text to generate audio from
        Return:
            A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
        """
        with no_grad():
            waveforms = self.synt.tts(inputs, **self.tts_kwargs)
            waveforms = np.array(waveforms, dtype=np.float32)
        return waveforms, self.sampling_rate