Spaces:
Build error
Build error
import json | |
import os | |
import shutil | |
from functools import reduce | |
from pathlib import Path | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import yaml | |
from pylab import xticks, np | |
from tqdm import tqdm | |
from modules.vocoders.nsf_hifigan import NsfHifiGAN | |
from preprocessing.process_pipeline import get_pitch_parselmouth, get_pitch_crepe | |
from utils.hparams import set_hparams, hparams | |
head_list = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] | |
def compare_pitch(f0_static_dict, pitch_time_temp, trans_key=0): | |
return sum({k: v * f0_static_dict[str(k + trans_key)] for k, v in pitch_time_temp.items() if | |
str(k + trans_key) in f0_static_dict}.values()) | |
def f0_to_pitch(ff): | |
f0_pitch = 69 + 12 * np.log2(ff / 440) | |
return round(f0_pitch, 0) | |
def pitch_to_name(pitch): | |
return f"{head_list[int(pitch % 12)]}{int(pitch / 12) - 1}" | |
def get_f0(audio_path, crepe=False): | |
wav, mel = NsfHifiGAN.wav2spec(audio_path) | |
if crepe: | |
f0, pitch_coarse = get_pitch_crepe(wav, mel, hparams) | |
else: | |
f0, pitch_coarse = get_pitch_parselmouth(wav, mel, hparams) | |
return f0 | |
def merge_f0_dict(dict_list): | |
def sum_dict(a, b): | |
temp = dict() | |
for key in a.keys() | b.keys(): | |
temp[key] = sum([d.get(key, 0) for d in (a, b)]) | |
return temp | |
return reduce(sum_dict, dict_list) | |
def collect_f0(f0): | |
pitch_num = {} | |
pitch_list = [f0_to_pitch(x) for x in f0[f0 > 0]] | |
for key in pitch_list: | |
pitch_num[key] = pitch_num.get(key, 0) + 1 | |
return pitch_num | |
def static_f0_time(f0): | |
if isinstance(f0, dict): | |
pitch_num = merge_f0_dict({k: collect_f0(v) for k, v in f0.items()}.values()) | |
else: | |
pitch_num = collect_f0(f0) | |
static_pitch_time = {} | |
sort_key = sorted(pitch_num.keys()) | |
for key in sort_key: | |
static_pitch_time[key] = round(pitch_num[key] * hparams['hop_size'] / hparams['audio_sample_rate'], 2) | |
return static_pitch_time | |
def get_end_file(dir_path, end): | |
file_lists = [] | |
for root, dirs, files in os.walk(dir_path): | |
files = [f for f in files if f[0] != '.'] | |
dirs[:] = [d for d in dirs if d[0] != '.'] | |
for f_file in files: | |
if f_file.endswith(end): | |
file_lists.append(os.path.join(root, f_file).replace("\\", "/")) | |
return file_lists | |
if __name__ == "__main__": | |
# 给config文件增加f0_static统计音域 | |
config_path = "F:/sovits/diff-svc-main/checkpoints/aquapre/config.yaml" | |
hparams = set_hparams(config=config_path, exp_name='', infer=True, reset=True, hparams_str='', print_hparams=False) | |
f0_dict = {} | |
# 获取batch文件夹下所有wav文件 | |
wav_paths = get_end_file("F:/sovits/diff-svc-main/batch/aquapre", "wav") | |
# parselmouth获取f0 | |
with tqdm(total=len(wav_paths)) as p_bar: | |
p_bar.set_description('Processing') | |
for wav_path in wav_paths: | |
f0_dict[wav_path] = get_f0(wav_path, crepe=False) | |
p_bar.update(1) | |
pitch_time = static_f0_time(f0_dict) | |
total_time = round(sum(pitch_time.values()), 2) | |
pitch_time["total_time"] = total_time | |
print(f"total time: {total_time}s") | |
shutil.copy(config_path, f"{Path(config_path).parent}\\back_{Path(config_path).name}") | |
with open(config_path, encoding='utf-8') as f: | |
_hparams = yaml.safe_load(f) | |
_hparams['f0_static'] = json.dumps(pitch_time) | |
with open(config_path, 'w', encoding='utf-8') as f: | |
yaml.safe_dump(_hparams, f) | |
print("原config文件已在原目录建立备份:back_config.yaml") | |
print("音域统计已保存至config文件,此模型可使用自动变调功能") | |
matplotlib.use('TkAgg') | |
plt.title("数据集音域统计", fontproperties='SimHei') | |
plt.xlabel("音高", fontproperties='SimHei') | |
plt.ylabel("时长(s)", fontproperties='SimHei') | |
xticks_labels = [pitch_to_name(i) for i in range(36, 96)] | |
xticks(np.linspace(36, 96, 60, endpoint=True), xticks_labels) | |
plt.plot(pitch_time.keys(), pitch_time.values(), color='dodgerblue') | |
plt.show() | |