|
import io |
|
import logging |
|
import time |
|
from pathlib import Path |
|
|
|
import librosa |
|
import numpy as np |
|
import soundfile |
|
|
|
from inference import infer_tool |
|
from inference import slicer |
|
from inference.infer_tool import Svc |
|
|
|
logging.getLogger('numba').setLevel(logging.WARNING) |
|
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json") |
|
|
|
model_path = "logs/48k/G_280000.pth" |
|
config_path = "configs/config.json" |
|
svc_model = Svc(model_path, config_path) |
|
infer_tool.mkdir(["raw", "results"]) |
|
|
|
|
|
clean_names = ["深海少女part1"] |
|
trans = [0] |
|
spk_list = ['Umi'] |
|
slice_db = -50 |
|
wav_format = 'wav' |
|
|
|
infer_tool.fill_a_to_b(trans, clean_names) |
|
for clean_name, tran in zip(clean_names, trans): |
|
raw_audio_path = f"raw/{clean_name}" |
|
if "." not in raw_audio_path: |
|
raw_audio_path += ".wav" |
|
infer_tool.format_wav(raw_audio_path) |
|
wav_path = Path(raw_audio_path).with_suffix('.wav') |
|
audio, sr = librosa.load(wav_path, mono=True, sr=None) |
|
wav_hash = infer_tool.get_md5(audio) |
|
if wav_hash in chunks_dict.keys(): |
|
print("load chunks from temp") |
|
chunks = chunks_dict[wav_hash]["chunks"] |
|
else: |
|
chunks = slicer.cut(wav_path, db_thresh=slice_db) |
|
print(chunks) |
|
chunks_dict[wav_hash] = {"chunks": chunks, "time": int(time.time())} |
|
infer_tool.write_temp("inference/chunks_temp.json", chunks_dict) |
|
audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) |
|
|
|
for spk in spk_list: |
|
audio = [] |
|
for (slice_tag, data) in audio_data: |
|
print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') |
|
length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample)) |
|
raw_path = io.BytesIO() |
|
soundfile.write(raw_path, data, audio_sr, format="wav") |
|
raw_path.seek(0) |
|
if slice_tag: |
|
print('jump empty segment') |
|
_audio = np.zeros(length) |
|
else: |
|
out_audio, out_sr = svc_model.infer(spk, tran, raw_path) |
|
_audio = out_audio.cpu().numpy() |
|
audio.extend(list(_audio)) |
|
|
|
res_path = f'./results/{clean_name}_{tran}key_{spk}.{wav_format}' |
|
soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format) |
|
|