File size: 1,242 Bytes
79728b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from typing import Tuple
import subprocess
from torch import no_grad, package
import numpy as np
import os
class PreTrainedPipeline():
def __init__(self, path: str):
# Install espeak-ng
subprocess.run("apt-get update -y && apt-get install espeak-ng -y", shell=True,
universal_newlines=True, start_new_session=True)
# Init model
model_path = os.path.join(path, "model.pt")
importer = package.PackageImporter(model_path)
synt = importer.load_pickle("tts_models", "model")
self.synt = synt
self.tts_kwargs = {
"speaker_name": "uk",
"language_name": "uk",
}
self.sampling_rate = self.synt.output_sample_rate
def __call__(self, inputs: str) -> Tuple[np.array, int]:
"""
Args:
inputs (:obj:`str`):
The text to generate audio from
Return:
A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
"""
with no_grad():
waveforms = self.synt.tts(inputs, **self.tts_kwargs)
waveforms = np.array(waveforms, dtype=np.float32)
return waveforms, self.sampling_rate |