QLWD commited on
Commit
54b3702
·
verified ·
1 Parent(s): c922505

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import librosa
4
+ import soundfile as sf
5
+ import gradio as gr
6
+ from fairseq import checkpoint_utils
7
+
8
+ # 配置路径
9
+ MODEL_PATH = "ayumi.pth" # RVC 微调模型路径
10
+ INDEX_PATH = "added_IVF738_Flat_nprobe_1_ayumi_v2.index" # RVC 索引文件路径
11
+ TARGET_SAMPLE_RATE = 16000 # 目标采样率
12
+ OUTPUT_AUDIO_PATH = "converted_audio.wav" # 转换后的音频保存路径
13
+
14
+ # 加载模型
15
+ def load_rvc_model(model_path):
16
+ print("加载 RVC 模型中...")
17
+ model, cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path])
18
+ model = model[0].eval().cuda()
19
+ print("模型加载成功")
20
+ return model
21
+
22
+ # 预处理音频
23
+ def preprocess_audio(file_path, target_sr=16000):
24
+ audio, sr = librosa.load(file_path, sr=target_sr)
25
+ return audio, sr
26
+
27
+ # 声音转换
28
+ def convert_audio(model, input_audio, sr):
29
+ with torch.no_grad():
30
+ input_tensor = torch.tensor(input_audio).unsqueeze(0).float().cuda()
31
+ output_audio = model(input_tensor).cpu().numpy()
32
+ return output_audio
33
+
34
+ # 加载模型
35
+ rvc_model = load_rvc_model(MODEL_PATH)
36
+
37
+ # Gradio 接口处理函数
38
+ def process_audio(file):
39
+ # 加载用户上传的音频
40
+ input_audio, sr = preprocess_audio(file.name, TARGET_SAMPLE_RATE)
41
+ print(f"加载音频完成,采样率:{sr}")
42
+
43
+ # 调用 RVC 模型转换音频
44
+ converted_audio = convert_audio(rvc_model, input_audio, sr)
45
+ print("音频转换完成")
46
+
47
+ # 保存输出音频
48
+ sf.write(OUTPUT_AUDIO_PATH, converted_audio, sr)
49
+ return OUTPUT_AUDIO_PATH
50
+
51
+ # 构建 Gradio 界面
52
+ interface = gr.Interface(
53
+ fn=process_audio,
54
+ inputs=gr.Audio(label="上传音频", type="file"),
55
+ outputs=gr.Audio(label="转换后的音频"),
56
+ title="RVC 音色转换",
57
+ description="上传任意音频,使用微调的 RVC 模型将其转换为目标音色。"
58
+ )
59
+
60
+ # 启动应用
61
+ if __name__ == "__main__":
62
+ interface.launch()