tts-rvc-autopst / prepare_train_data.py
jonathanjordan21's picture
67809715652a92b22870c50ad30f6ff38e292006aedc75ddbdc828aa856ef68f
c021d8e verified
raw
history blame
3.05 kB
import os
import pickle
import numpy as np
import scipy.fftpack
import soundfile as sf
from utils import pySTFT
from scipy import signal
from librosa.filters import mel
from utils import butter_highpass
import torch
import torch.nn.functional as F
from model_sea import Generator as Model
from hparams_sea import hparams
mel_basis = mel(16000, 1024, fmin=90, fmax=7600, n_mels=80).T
min_level = np.exp(-100 / 20 * np.log(10))
b, a = butter_highpass(30, 16000, order=5)
mfcc_mean, mfcc_std, dctmx = pickle.load(open('assets/mfcc_stats.pkl', 'rb'))
spk2emb = pickle.load(open('assets/spk2emb_82.pkl', 'rb'))
rootDir = "assets/vctk16-train-wav"
targetDir_sp = 'assets/vctk16-train-sp-mel'
targetDir_cep = 'assets/vctk16-train-cep-mel'
targetDir_cd = 'assets/vctk16-train-teacher'
device = 'cuda:0'
G = Model(hparams).eval().to(device)
g_checkpoint = torch.load('assets/sea.ckpt', map_location=lambda storage, loc: storage)
G.load_state_dict(g_checkpoint['model'], strict=True)
metadata = []
dirName, subdirList, _ = next(os.walk(rootDir))
for subdir in sorted(subdirList):
print(subdir)
if not os.path.exists(os.path.join(targetDir_sp, subdir)):
os.makedirs(os.path.join(targetDir_sp, subdir))
if not os.path.exists(os.path.join(targetDir_cep, subdir)):
os.makedirs(os.path.join(targetDir_cep, subdir))
if not os.path.exists(os.path.join(targetDir_cd, subdir)):
os.makedirs(os.path.join(targetDir_cd, subdir))
submeta = []
submeta.append(subdir)
submeta.append(spk2emb[subdir])
_,_, fileList = next(os.walk(os.path.join(dirName,subdir)))
for fileName in sorted(fileList):
x, fs = sf.read(os.path.join(dirName,subdir,fileName))
if x.shape[0] % 256 == 0:
x = np.concatenate((x, np.array([1e-06])), axis=0)
y = signal.filtfilt(b, a, x)
D = pySTFT(y * 0.96).T
D_mel = np.dot(D, mel_basis)
D_db = 20 * np.log10(np.maximum(min_level, D_mel))
# mel sp
S = (D_db + 80) / 100
# mel cep
cc_tmp = S.dot(dctmx)
cc_norm = (cc_tmp - mfcc_mean) / mfcc_std
S = np.clip(S, 0, 1)
# teacher code
cc_torch = torch.from_numpy(cc_norm[:,0:20].astype(np.float32)).unsqueeze(0).to(device)
with torch.no_grad():
codes = G.encode(cc_torch, torch.ones_like(cc_torch[:,:,0])).squeeze(0)
np.save(os.path.join(targetDir_cd, subdir, fileName[:-4]),
codes.cpu().numpy(), allow_pickle=False)
np.save(os.path.join(targetDir_sp, subdir, fileName[:-4]),
S.astype(np.float32), allow_pickle=False)
np.save(os.path.join(targetDir_cep, subdir, fileName[:-4]),
cc_norm.astype(np.float32), allow_pickle=False)
submeta.append(subdir+'/'+fileName[:-4]+'.npy')
metadata.append(submeta)
with open('./assets/train_vctk.meta', 'wb') as handle:
pickle.dump(metadata, handle)