File size: 1,401 Bytes
f53bfd2 ad172fd f53bfd2 17530a7 afe8005 17530a7 8798153 c601f5d 8798153 17530a7 afe8005 502549a afe8005 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
---
framework: fairseq
task: text-to-speech
tags:
- fairseq
- audio
- text-to-speech
language: en
datasets:
- ljspeech
---
## Example to download fastspeech2 from fairseq
The following should work with fairseq's most up-to-date version in a google colab:
```python
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
import IPython.display as ipd
import torch
model_ensemble, cfg, task = load_model_ensemble_and_task_from_hf_hub(
"facebook/fastspeech2-en-ljspeech", arg_overrides={"vocoder": "griffin_lim", "fp16": False}
)
def tokenize(text):
import g2p_en
tokenized = g2p_en.G2p()(text)
tokenized = [{",": "sp", ";": "sp"}.get(p, p) for p in tokenized]
return " ".join(p for p in tokenized if p.isalnum())
text = "This is a cool demo for speech synthesis, don't you think so?"
tokenized = tokenize(text)
sample = {
"net_input": {
"src_tokens": task.src_dict.encode_line(tokenized).view(1, -1),
"src_lengths": torch.Tensor([len(tokenized.split())]).long(),
"prev_output_tokens": None
},
"target_lengths": None,
"speaker": None,
}
generator = task.build_generator(model_ensemble, cfg)
generation = generator.generate(model_ensemble[0], sample)
waveform = generation[0]["waveform"]
ipd.Audio(waveform, rate=task.sr)
```
See: https://colab.research.google.com/drive/1gvq4Y1urrg9QrQ9031sZIP93LKspIh_X?usp=sharing |