Spaces:
Runtime error
Runtime error
import argparse | |
import os | |
import os.path as P | |
from copy import deepcopy | |
from functools import partial | |
from glob import glob | |
from multiprocessing import Pool | |
from pathlib import Path | |
import librosa | |
import numpy as np | |
import torchvision | |
class MelSpectrogram(object): | |
def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, inverse=False): | |
self.sr = sr | |
self.nfft = nfft | |
self.fmin = fmin | |
self.fmax = fmax | |
self.nmels = nmels | |
self.hoplen = hoplen | |
self.spec_power = spec_power | |
self.inverse = inverse | |
self.mel_basis = librosa.filters.mel(sr=sr, n_fft=nfft, fmin=fmin, fmax=fmax, n_mels=nmels) | |
def __call__(self, x): | |
if self.inverse: | |
spec = librosa.feature.inverse.mel_to_stft( | |
x, sr=self.sr, n_fft=self.nfft, fmin=self.fmin, fmax=self.fmax, power=self.spec_power | |
) | |
wav = librosa.griffinlim(spec, hop_length=self.hoplen) | |
return wav | |
else: | |
spec = np.abs(librosa.stft(x, n_fft=self.nfft, hop_length=self.hoplen)) ** self.spec_power | |
mel_spec = np.dot(self.mel_basis, spec) | |
return mel_spec | |
class LowerThresh(object): | |
def __init__(self, min_val, inverse=False): | |
self.min_val = min_val | |
self.inverse = inverse | |
def __call__(self, x): | |
if self.inverse: | |
return x | |
else: | |
return np.maximum(self.min_val, x) | |
class Add(object): | |
def __init__(self, val, inverse=False): | |
self.inverse = inverse | |
self.val = val | |
def __call__(self, x): | |
if self.inverse: | |
return x - self.val | |
else: | |
return x + self.val | |
class Subtract(Add): | |
def __init__(self, val, inverse=False): | |
self.inverse = inverse | |
self.val = val | |
def __call__(self, x): | |
if self.inverse: | |
return x + self.val | |
else: | |
return x - self.val | |
class Multiply(object): | |
def __init__(self, val, inverse=False) -> None: | |
self.val = val | |
self.inverse = inverse | |
def __call__(self, x): | |
if self.inverse: | |
return x / self.val | |
else: | |
return x * self.val | |
class Divide(Multiply): | |
def __init__(self, val, inverse=False): | |
self.inverse = inverse | |
self.val = val | |
def __call__(self, x): | |
if self.inverse: | |
return x * self.val | |
else: | |
return x / self.val | |
class Log10(object): | |
def __init__(self, inverse=False): | |
self.inverse = inverse | |
def __call__(self, x): | |
if self.inverse: | |
return 10 ** x | |
else: | |
return np.log10(x) | |
class Clip(object): | |
def __init__(self, min_val, max_val, inverse=False): | |
self.min_val = min_val | |
self.max_val = max_val | |
self.inverse = inverse | |
def __call__(self, x): | |
if self.inverse: | |
return x | |
else: | |
return np.clip(x, self.min_val, self.max_val) | |
class TrimSpec(object): | |
def __init__(self, max_len, inverse=False): | |
self.max_len = max_len | |
self.inverse = inverse | |
def __call__(self, x): | |
if self.inverse: | |
return x | |
else: | |
return x[:, :self.max_len] | |
class MaxNorm(object): | |
def __init__(self, inverse=False): | |
self.inverse = inverse | |
self.eps = 1e-10 | |
def __call__(self, x): | |
if self.inverse: | |
return x | |
else: | |
return x / (x.max() + self.eps) | |
TRANSFORMS_16000 = torchvision.transforms.Compose([ | |
MelSpectrogram(sr=16000, nfft=1024, fmin=125, fmax=7600, nmels=80, hoplen=1024//4, spec_power=1), | |
LowerThresh(1e-5), | |
Log10(), | |
Multiply(20), | |
Subtract(20), | |
Add(100), | |
Divide(100), | |
Clip(0, 1.0) | |
# TrimSpec(860) | |
]) | |