GenerSpeech / utils /audio.py
Rongjiehuang's picture
update
222619b
raw
history blame
1.56 kB
import subprocess
import matplotlib
matplotlib.use('Agg')
import librosa
import librosa.filters
import numpy as np
from scipy import signal
from scipy.io import wavfile
def save_wav(wav, path, sr, norm=False):
if norm:
wav = wav / np.abs(wav).max()
wav *= 32767
# proposed by @dsmiller
wavfile.write(path, sr, wav.astype(np.int16))
def get_hop_size(hparams):
hop_size = hparams['hop_size']
if hop_size is None:
assert hparams['frame_shift_ms'] is not None
hop_size = int(hparams['frame_shift_ms'] / 1000 * hparams['audio_sample_rate'])
return hop_size
###########################################################################################
def _stft(y, hparams):
return librosa.stft(y=y, n_fft=hparams['fft_size'], hop_length=get_hop_size(hparams),
win_length=hparams['win_size'], pad_mode='constant')
def _istft(y, hparams):
return librosa.istft(y, hop_length=get_hop_size(hparams), win_length=hparams['win_size'])
def librosa_pad_lr(x, fsize, fshift, pad_sides=1):
'''compute right padding (final frame) or both sides padding (first and final frames)
'''
assert pad_sides in (1, 2)
# return int(fsize // 2)
pad = (x.shape[0] // fshift + 1) * fshift - x.shape[0]
if pad_sides == 1:
return 0, pad
else:
return pad // 2, pad // 2 + pad % 2
# Conversions
def amp_to_db(x):
return 20 * np.log10(np.maximum(1e-5, x))
def normalize(S, hparams):
return (S - hparams['min_level_db']) / -hparams['min_level_db']