"""Module containing audio helper functions. """ import numpy as np import librosa import config as cfg RANDOM = np.random.RandomState(cfg.RANDOM_SEED) def openAudioFile(path: str, sample_rate=cfg.SAMPLE_RATE, offset=0.0, duration=None): """Open an audio file. Opens an audio file with librosa and the given settings. Args: path: Path to the audio file. sample_rate: The sample rate at which the file should be processed. offset: The starting offset. duration: Maximum duration of the loaded content. Returns: Returns the audio time series and the sampling rate. """ # Open file with librosa (uses ffmpeg or libav) sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast") return sig, rate def saveSignal(sig, fname: str): """Saves a signal to file. Args: sig: The signal to be saved. fname: The file path. """ import soundfile as sf sf.write(fname, sig, cfg.SAMPLE_RATE, "PCM_16") def noise(sig, shape, amount=None): """Creates noise. Creates a noise vector with the given shape. Args: sig: The original audio signal. shape: Shape of the noise. amount: The noise intensity. Returns: An numpy array of noise with the given shape. """ # Random noise intensity if amount == None: amount = RANDOM.uniform(0.1, 0.5) # Create Gaussian noise try: noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape) except: noise = np.zeros(shape) return noise.astype("float32") def splitSignal(sig, rate, seconds, overlap, minlen): """Split signal with overlap. Args: sig: The original signal to be split. rate: The sampling rate. seconds: The duration of a segment. overlap: The overlapping seconds of segments. minlen: Minimum length of a split. Returns: A list of splits. """ sig_splits = [] for i in range(0, len(sig), int((seconds - overlap) * rate)): split = sig[i : i + int(seconds * rate)] # End of signal? if len(split) < int(minlen * rate): break # Signal chunk too short? if len(split) < int(rate * seconds): split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5))) sig_splits.append(split) return sig_splits def cropCenter(sig, rate, seconds): """Crop signal to center. Args: sig: The original signal. rate: The sampling rate. seconds: The length of the signal. """ if len(sig) > int(seconds * rate): start = int((len(sig) - int(seconds * rate)) / 2) end = start + int(seconds * rate) sig = sig[start:end] # Pad with noise elif len(sig) < int(seconds * rate): sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5))) return sig