Spaces:
Runtime error
Runtime error
kevinwang676
commited on
Commit
•
42a2435
1
Parent(s):
e8a3d79
Update webui.py
Browse files
webui.py
CHANGED
@@ -30,7 +30,9 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
30 |
from cosyvoice.cli.cosyvoice import CosyVoice
|
31 |
from cosyvoice.utils.file_utils import load_wav
|
32 |
|
33 |
-
|
|
|
|
|
34 |
format='%(asctime)s %(levelname)s %(message)s')
|
35 |
|
36 |
def generate_seed():
|
@@ -58,15 +60,22 @@ def postprocess(speech, top_db=60, hop_length=220, win_length=440):
|
|
58 |
speech = torch.concat([speech, torch.zeros(1, int(target_sr * 0.2))], dim=1)
|
59 |
return speech
|
60 |
|
61 |
-
inference_mode_list = ['
|
62 |
-
instruct_dict = {'预训练音色': '1. 选择预训练音色\n2
|
63 |
-
'3s极速复刻': '1.
|
64 |
-
'跨语种复刻': '1.
|
65 |
-
'自然语言控制': '1.
|
66 |
def change_instruction(mode_checkbox_group):
|
67 |
return instruct_dict[mode_checkbox_group]
|
68 |
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
if prompt_wav_upload is not None:
|
71 |
prompt_wav = prompt_wav_upload
|
72 |
elif prompt_wav_record is not None:
|
@@ -93,7 +102,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
93 |
if prompt_wav is None:
|
94 |
gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
|
95 |
return (target_sr, default_data)
|
96 |
-
gr.Info('您正在使用跨语种复刻模式,
|
97 |
# if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
|
98 |
if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
|
99 |
if prompt_wav is None:
|
@@ -135,51 +144,48 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
135 |
audio_data = output['tts_speech'].numpy().flatten()
|
136 |
return (target_sr, audio_data)
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
prompt_sr, target_sr = 16000, 22050
|
184 |
-
default_data = np.zeros(target_sr)
|
185 |
-
main()
|
|
|
30 |
from cosyvoice.cli.cosyvoice import CosyVoice
|
31 |
from cosyvoice.utils.file_utils import load_wav
|
32 |
|
33 |
+
import spaces
|
34 |
+
|
35 |
+
logging.basicConfig(level=logging.WARNING,
|
36 |
format='%(asctime)s %(levelname)s %(message)s')
|
37 |
|
38 |
def generate_seed():
|
|
|
60 |
speech = torch.concat([speech, torch.zeros(1, int(target_sr * 0.2))], dim=1)
|
61 |
return speech
|
62 |
|
63 |
+
inference_mode_list = ['3s极速复刻', '跨语种复刻']
|
64 |
+
instruct_dict = {'预训练音色': '1. 选择预训练音色\n2.点击生成音频按钮',
|
65 |
+
'3s极速复刻': '1. 本地上传参考音频,或麦克风录入\n2. 输入参考音频对应的文本以及希望声音复刻的文本\n3.点击“一键开启声音复刻💕”',
|
66 |
+
'跨语种复刻': '1. 本地上传参考音频,或麦克风录入\n2. **无需输入**参考音频对应的文本\n3.点击“一键开启声音复刻💕”',
|
67 |
+
'自然语言控制': '1. 输入instruct文本\n2.点击生成音频按钮'}
|
68 |
def change_instruction(mode_checkbox_group):
|
69 |
return instruct_dict[mode_checkbox_group]
|
70 |
|
71 |
+
@spaces.GPU(duration=70)
|
72 |
+
def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, instruct_text, seed):
|
73 |
+
prompt_wav_record = None
|
74 |
+
tts_text = "".join([item1 for item1 in tts_text.strip().split("\n") if item1 != ""]) + ".。"
|
75 |
+
print(tts_text)
|
76 |
+
prompt_text = "".join([item2 for item2 in prompt_text.strip().split("\n") if item2 != ""])
|
77 |
+
#if len(tts_text)>108:
|
78 |
+
# raise Exception('抱歉!你输入的文本超过了100字符,请您删减文本!')
|
79 |
if prompt_wav_upload is not None:
|
80 |
prompt_wav = prompt_wav_upload
|
81 |
elif prompt_wav_record is not None:
|
|
|
102 |
if prompt_wav is None:
|
103 |
gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
|
104 |
return (target_sr, default_data)
|
105 |
+
gr.Info('您正在使用跨语种复刻模式, 请确保合成文本和参考音频对应的文本为不同语言')
|
106 |
# if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
|
107 |
if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
|
108 |
if prompt_wav is None:
|
|
|
144 |
audio_data = output['tts_speech'].numpy().flatten()
|
145 |
return (target_sr, audio_data)
|
146 |
|
147 |
+
|
148 |
+
cosyvoice = CosyVoice('iic/CosyVoice-300M-Instruct')
|
149 |
+
sft_spk = cosyvoice.list_avaliable_spks()
|
150 |
+
prompt_sr, target_sr = 16000, 22050
|
151 |
+
default_data = np.zeros(target_sr)
|
152 |
+
|
153 |
+
app = gr.Blocks(theme="JohnSmith9982/small_and_pretty")
|
154 |
+
with app:
|
155 |
+
gr.Markdown("# <center>🌊💕🎶 [CosyVoice](https://www.bilibili.com/video/BV1vz421q7ir/) 3秒音频,开启最强声音复刻</center>")
|
156 |
+
gr.Markdown("## <center>🌟 只需3秒参考音频,一键开启超拟人真实声音复刻,支持中日英韩粤语,无需任何训练!</center>")
|
157 |
+
gr.Markdown("### <center>🤗 更多精彩,尽在[滔滔AI](https://www.talktalkai.com/);滔滔AI,为爱滔滔!💕</center>")
|
158 |
+
|
159 |
+
with gr.Row():
|
160 |
+
tts_text = gr.Textbox(label="请填写您希望声音复刻的文本内容", lines=3, info="中文文本建议不超过100个字,英文文本不超过100个单词", placeholder="想说却还没说的,还很多...")
|
161 |
+
mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='请选择声音复刻类型', value=inference_mode_list[0], info="如果声音复刻的文本和参考音频对应的文本是同一种语言,请选择“3s极速复刻”;不同语言,请选“跨语种复刻”", visible=False)
|
162 |
+
instruction_text = gr.Text(label="📔 操作指南", value=instruct_dict[inference_mode_list[0]], scale=0.5, visible=False)
|
163 |
+
sft_dropdown = gr.Dropdown(choices=sft_spk, label='选择预训练音色', value=sft_spk[0], scale=0.25)
|
164 |
+
with gr.Column(scale=0.25):
|
165 |
+
seed_button = gr.Button(value="\U0001F3B2", visible=True)
|
166 |
+
seed = gr.Number(value=0, label="随机推理种子", info="若数值保持不变,则每次生成结果一致", visible=True)
|
167 |
+
|
168 |
+
with gr.Row():
|
169 |
+
prompt_text = gr.Textbox(label="请填写参考音频对应的文本内容", lines=3, placeholder="告诉我参考音频说了些什么吧...", visible=False)
|
170 |
+
prompt_wav_upload = gr.Audio(type='filepath', label='请从本地上传您喜欢的参考音频,注意采样率不低于16kHz,时长不超过30s', visible=False)
|
171 |
+
#prompt_wav_record = gr.Audio(type='filepath', label='通过麦克风录制参考音频,程序会优先使用本地上传的参考音频', visible=False)
|
172 |
+
generate_button = gr.Button("一键开启声音复刻💕", variant="primary")
|
173 |
+
instruct_text = gr.Textbox(label="输入instruct文本", lines=1, placeholder="请输入instruct文本.", value='', visible=False)
|
174 |
+
|
175 |
+
|
176 |
+
audio_output = gr.Audio(label="为您生成的专属音频🎶")
|
177 |
+
|
178 |
+
seed_button.click(fn=generate_seed, inputs=[], outputs=seed)
|
179 |
+
generate_button.click(fn=generate_audio,
|
180 |
+
inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, instruct_text, seed],
|
181 |
+
outputs=audio_output, queue=False)
|
182 |
+
mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
|
183 |
+
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。请自觉合规使用此程序,程序开发者不负有任何责任。</center>")
|
184 |
+
gr.HTML('''
|
185 |
+
<div class="footer">
|
186 |
+
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|
187 |
+
</p>
|
188 |
+
</div>
|
189 |
+
''')
|
190 |
+
#app.queue(max_size=40, api_open=False)
|
191 |
+
app.launch(show_error=True)
|
|
|
|
|
|