|
import os |
|
import torch |
|
import librosa |
|
import soundfile as sf |
|
import gradio as gr |
|
from fairseq import checkpoint_utils |
|
|
|
|
|
MODEL_PATH = "ayumi.pth" |
|
INDEX_PATH = "added_IVF738_Flat_nprobe_1_ayumi_v2.index" |
|
TARGET_SAMPLE_RATE = 16000 |
|
OUTPUT_AUDIO_PATH = "converted_audio.wav" |
|
|
|
|
|
def load_rvc_model(model_path): |
|
print("加载 RVC 模型中...") |
|
model, cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path]) |
|
model = model[0].eval().cuda() |
|
print("模型加载成功") |
|
return model |
|
|
|
|
|
def preprocess_audio(file_path, target_sr=16000): |
|
audio, sr = librosa.load(file_path, sr=target_sr) |
|
return audio, sr |
|
|
|
|
|
def convert_audio(model, input_audio, sr): |
|
with torch.no_grad(): |
|
input_tensor = torch.tensor(input_audio).unsqueeze(0).float().cuda() |
|
output_audio = model(input_tensor).cpu().numpy() |
|
return output_audio |
|
|
|
|
|
rvc_model = load_rvc_model(MODEL_PATH) |
|
|
|
|
|
def process_audio(file): |
|
|
|
input_audio, sr = preprocess_audio(file.name, TARGET_SAMPLE_RATE) |
|
print(f"加载音频完成,采样率:{sr}") |
|
|
|
|
|
converted_audio = convert_audio(rvc_model, input_audio, sr) |
|
print("音频转换完成") |
|
|
|
|
|
sf.write(OUTPUT_AUDIO_PATH, converted_audio, sr) |
|
return OUTPUT_AUDIO_PATH |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_audio, |
|
inputs=gr.Audio(label="上传音频", type="file"), |
|
outputs=gr.Audio(label="转换后的音频"), |
|
title="RVC 音色转换", |
|
description="上传任意音频,使用微调的 RVC 模型将其转换为目标音色。" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|