Datasculptor's picture
Duplicate from AIGC-Audio/AudioGPT
98f685a
raw
history blame contribute delete
675 Bytes
import librosa
import librosa.filters
import math
import numpy as np
import scipy.io.wavfile
def load_wav(path):
max_length = 32000 * 10
wav = librosa.core.load(path, sr=32000)[0]
if len(wav) > max_length:
audio = wav[0:max_length]
# pad audio to max length, 10s for AudioCaps
if len(wav) < max_length:
# audio = torch.nn.functional.pad(audio, (0, self.max_length - audio.size(1)), 'constant')
wav = np.pad(wav, (0, max_length - len(wav)), 'constant')
wav = wav[...,None]
return wav
def save_wav(wav, path):
wav *= 32767 / max(0.01, np.max(np.abs(wav)))
scipy.io.wavfile.write(path, 32000, wav.astype(np.int16))