Commit
·
3f31acd
1
Parent(s):
810cc65
Update README.md
Browse files
README.md
CHANGED
@@ -36,6 +36,7 @@ Please notice that we encourage you to read our tutorials and learn more about
|
|
36 |
|
37 |
### Using the Vocoder
|
38 |
|
|
|
39 |
```python
|
40 |
import torch
|
41 |
from speechbrain.pretrained import HIFIGAN
|
@@ -46,13 +47,14 @@ mel_specs = torch.rand(2, 80,298)
|
|
46 |
waveforms = hifi_gan.decode_batch(mel_specs)
|
47 |
```
|
48 |
|
|
|
49 |
```python
|
50 |
import torchaudio
|
51 |
from speechbrain.pretrained import HIFIGAN
|
52 |
from speechbrain.lobes.models.FastSpeech2 import mel_spectogram
|
53 |
|
54 |
# Load a pretrained HIFIGAN Vocoder
|
55 |
-
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-libritts-16kHz", savedir="
|
56 |
|
57 |
# Load an audio file (an example file can be found in this repository)
|
58 |
# Ensure that the audio signal is sampled at 16000 Hz; refer to the provided link for a 22050 Hz Vocoder.
|
@@ -89,7 +91,7 @@ waveforms = hifi_gan.decode_batch(spectrogram)
|
|
89 |
# Save the reconstructed audio as a waveform
|
90 |
torchaudio.save('waveform_reconstructed.wav', waveforms.squeeze(1), 16000)
|
91 |
|
92 |
-
# If everything is set up correctly, the original and reconstructed audio should be nearly indistinguishable
|
93 |
|
94 |
```
|
95 |
|
|
|
36 |
|
37 |
### Using the Vocoder
|
38 |
|
39 |
+
- *Basic Usage:*
|
40 |
```python
|
41 |
import torch
|
42 |
from speechbrain.pretrained import HIFIGAN
|
|
|
47 |
waveforms = hifi_gan.decode_batch(mel_specs)
|
48 |
```
|
49 |
|
50 |
+
- *Spectrogram to Waveform Conversion:*
|
51 |
```python
|
52 |
import torchaudio
|
53 |
from speechbrain.pretrained import HIFIGAN
|
54 |
from speechbrain.lobes.models.FastSpeech2 import mel_spectogram
|
55 |
|
56 |
# Load a pretrained HIFIGAN Vocoder
|
57 |
+
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-libritts-16kHz", savedir="vocoder_16khz")
|
58 |
|
59 |
# Load an audio file (an example file can be found in this repository)
|
60 |
# Ensure that the audio signal is sampled at 16000 Hz; refer to the provided link for a 22050 Hz Vocoder.
|
|
|
91 |
# Save the reconstructed audio as a waveform
|
92 |
torchaudio.save('waveform_reconstructed.wav', waveforms.squeeze(1), 16000)
|
93 |
|
94 |
+
# If everything is set up correctly, the original and reconstructed audio should be nearly indistinguishable
|
95 |
|
96 |
```
|
97 |
|