Spaces:
Runtime error
Runtime error
"""Module containing audio helper functions. | |
""" | |
import numpy as np | |
import librosa | |
import config as cfg | |
RANDOM = np.random.RandomState(cfg.RANDOM_SEED) | |
def openAudioFile(path: str, sample_rate=cfg.SAMPLE_RATE, offset=0.0, duration=None): | |
"""Open an audio file. | |
Opens an audio file with librosa and the given settings. | |
Args: | |
path: Path to the audio file. | |
sample_rate: The sample rate at which the file should be processed. | |
offset: The starting offset. | |
duration: Maximum duration of the loaded content. | |
Returns: | |
Returns the audio time series and the sampling rate. | |
""" | |
# Open file with librosa (uses ffmpeg or libav) | |
sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast") | |
return sig, rate | |
def saveSignal(sig, fname: str): | |
"""Saves a signal to file. | |
Args: | |
sig: The signal to be saved. | |
fname: The file path. | |
""" | |
import soundfile as sf | |
sf.write(fname, sig, cfg.SAMPLE_RATE, "PCM_16") | |
def noise(sig, shape, amount=None): | |
"""Creates noise. | |
Creates a noise vector with the given shape. | |
Args: | |
sig: The original audio signal. | |
shape: Shape of the noise. | |
amount: The noise intensity. | |
Returns: | |
An numpy array of noise with the given shape. | |
""" | |
# Random noise intensity | |
if amount == None: | |
amount = RANDOM.uniform(0.1, 0.5) | |
# Create Gaussian noise | |
try: | |
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape) | |
except: | |
noise = np.zeros(shape) | |
return noise.astype("float32") | |
def splitSignal(sig, rate, seconds, overlap, minlen): | |
"""Split signal with overlap. | |
Args: | |
sig: The original signal to be split. | |
rate: The sampling rate. | |
seconds: The duration of a segment. | |
overlap: The overlapping seconds of segments. | |
minlen: Minimum length of a split. | |
Returns: | |
A list of splits. | |
""" | |
sig_splits = [] | |
for i in range(0, len(sig), int((seconds - overlap) * rate)): | |
split = sig[i : i + int(seconds * rate)] | |
# End of signal? | |
if len(split) < int(minlen * rate): | |
break | |
# Signal chunk too short? | |
if len(split) < int(rate * seconds): | |
split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5))) | |
sig_splits.append(split) | |
return sig_splits | |
def cropCenter(sig, rate, seconds): | |
"""Crop signal to center. | |
Args: | |
sig: The original signal. | |
rate: The sampling rate. | |
seconds: The length of the signal. | |
""" | |
if len(sig) > int(seconds * rate): | |
start = int((len(sig) - int(seconds * rate)) / 2) | |
end = start + int(seconds * rate) | |
sig = sig[start:end] | |
# Pad with noise | |
elif len(sig) < int(seconds * rate): | |
sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5))) | |
return sig | |