Spaces:
Runtime error
Runtime error
File size: 3,825 Bytes
e4b13fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import argparse
import os
import os.path as P
from copy import deepcopy
from functools import partial
from glob import glob
from multiprocessing import Pool
from pathlib import Path
import librosa
import numpy as np
import torchvision
class MelSpectrogram(object):
def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, inverse=False):
self.sr = sr
self.nfft = nfft
self.fmin = fmin
self.fmax = fmax
self.nmels = nmels
self.hoplen = hoplen
self.spec_power = spec_power
self.inverse = inverse
self.mel_basis = librosa.filters.mel(sr=sr, n_fft=nfft, fmin=fmin, fmax=fmax, n_mels=nmels)
def __call__(self, x):
if self.inverse:
spec = librosa.feature.inverse.mel_to_stft(
x, sr=self.sr, n_fft=self.nfft, fmin=self.fmin, fmax=self.fmax, power=self.spec_power
)
wav = librosa.griffinlim(spec, hop_length=self.hoplen)
return wav
else:
spec = np.abs(librosa.stft(x, n_fft=self.nfft, hop_length=self.hoplen)) ** self.spec_power
mel_spec = np.dot(self.mel_basis, spec)
return mel_spec
class LowerThresh(object):
def __init__(self, min_val, inverse=False):
self.min_val = min_val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return np.maximum(self.min_val, x)
class Add(object):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x - self.val
else:
return x + self.val
class Subtract(Add):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x + self.val
else:
return x - self.val
class Multiply(object):
def __init__(self, val, inverse=False) -> None:
self.val = val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x / self.val
else:
return x * self.val
class Divide(Multiply):
def __init__(self, val, inverse=False):
self.inverse = inverse
self.val = val
def __call__(self, x):
if self.inverse:
return x * self.val
else:
return x / self.val
class Log10(object):
def __init__(self, inverse=False):
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return 10 ** x
else:
return np.log10(x)
class Clip(object):
def __init__(self, min_val, max_val, inverse=False):
self.min_val = min_val
self.max_val = max_val
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return np.clip(x, self.min_val, self.max_val)
class TrimSpec(object):
def __init__(self, max_len, inverse=False):
self.max_len = max_len
self.inverse = inverse
def __call__(self, x):
if self.inverse:
return x
else:
return x[:, :self.max_len]
class MaxNorm(object):
def __init__(self, inverse=False):
self.inverse = inverse
self.eps = 1e-10
def __call__(self, x):
if self.inverse:
return x
else:
return x / (x.max() + self.eps)
TRANSFORMS_16000 = torchvision.transforms.Compose([
MelSpectrogram(sr=16000, nfft=1024, fmin=125, fmax=7600, nmels=80, hoplen=1024//4, spec_power=1),
LowerThresh(1e-5),
Log10(),
Multiply(20),
Subtract(20),
Add(100),
Divide(100),
Clip(0, 1.0)
# TrimSpec(860)
])
|