Make_An_Audio_inpaint / ldm /data /extract_mel_spectrogram.py
lmzjms's picture
Upload 169 files
e4b13fd
raw
history blame
3.83 kB
import argparse
import os
import os.path as P
from copy import deepcopy
from functools import partial
from glob import glob
from multiprocessing import Pool
from pathlib import Path
import librosa
import numpy as np
import torchvision
class MelSpectrogram(object):
def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, inverse=False):
self.sr = sr
self.nfft = nfft
self.fmin = fmin
self.fmax = fmax
self.nmels = nmels
self.hoplen = hoplen
self.spec_power = spec_power
self.inverse = inverse
self.mel_basis = librosa.filters.mel(sr=sr, n_fft=nfft, fmin=fmin, fmax=fmax, n_mels=nmels)
def __call__(self, x):
if self.inverse:
spec = librosa.feature.inverse.mel_to_stft(
x, sr=self.sr, n_fft=self.nfft, fmin=self.fmin, fmax=self.fmax, power=self.spec_power
)
wav = librosa.griffinlim(spec, hop_length=self.hoplen)
return wav
else:
spec = np.abs(librosa.stft(x, n_fft=self.nfft, hop_length=self.hoplen)) ** self.spec_power
mel_spec = np.dot(self.mel_basis, spec)
return mel_spec
class LowerThresh(object):
def __init__(self, min_val, inverse=False):
self.min_val = min_val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return np.maximum(self.min_val, x)
class Add(object):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x - self.val
else:
return x + self.val
class Subtract(Add):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x + self.val
else:
return x - self.val
class Multiply(object):
def __init__(self, val, inverse=False) -> None:
self.val = val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x / self.val
else:
return x * self.val
class Divide(Multiply):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x * self.val
else:
return x / self.val
class Log10(object):
def __init__(self, inverse=False):
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return 10 ** x
else:
return np.log10(x)
class Clip(object):
def __init__(self, min_val, max_val, inverse=False):
self.min_val = min_val
self.max_val = max_val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return np.clip(x, self.min_val, self.max_val)
class TrimSpec(object):
def __init__(self, max_len, inverse=False):
self.max_len = max_len
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return x[:, :self.max_len]
class MaxNorm(object):
def __init__(self, inverse=False):
self.inverse = inverse
self.eps = 1e-10
def __call__(self, x):
if self.inverse:
return x
else:
return x / (x.max() + self.eps)
TRANSFORMS_16000 = torchvision.transforms.Compose([
MelSpectrogram(sr=16000, nfft=1024, fmin=125, fmax=7600, nmels=80, hoplen=1024//4, spec_power=1),
LowerThresh(1e-5),
Log10(),
Multiply(20),
Subtract(20),
Add(100),
Divide(100),
Clip(0, 1.0)
# TrimSpec(860)
])