Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ for v in voicelist:
|
|
60 |
# # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
61 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
62 |
if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
|
63 |
-
def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
64 |
if text.strip() == "":
|
65 |
raise gr.Error("You must enter some text")
|
66 |
if len(text) > 50000:
|
@@ -73,7 +73,7 @@ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
|
73 |
audios = []
|
74 |
for t in progress.tqdm(texts):
|
75 |
print(t)
|
76 |
-
audios.append(styletts2importable.inference(t, voices[v], alpha=
|
77 |
return (24000, np.concatenate(audios))
|
78 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
79 |
# if password == os.environ['ACCESS_CODE']:
|
@@ -178,17 +178,21 @@ def ljsynthesize(text, steps,embscale, progress=gr.Progress()):
|
|
178 |
# clbtn = gr.Button("Synthesize", variant="primary")
|
179 |
# claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
180 |
# clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
|
|
|
181 |
with gr.Blocks() as vctk:
|
182 |
with gr.Row():
|
183 |
with gr.Column(scale=1):
|
184 |
inp = gr.Textbox(label="Text", info="Enter the text | ใใญในใใๅ
ฅใใฆใใ ใใใ็ญใใใใจใฒใฉใใชใใพใ.", value="ใใชใใใใชใใจใไธ็ใฏ่ฒ่คชใใฆ่ฆใใพใใใใชใใฎ็ฌ้กใ็งใฎๆฅใ
ใๆใใ็
งใใใฆใใพใใใใชใใใใชใๆฅใฏใใพใใงๅฌใฎใใใซๅฏใใๆใใงใ.", interactive=True)
|
185 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
|
|
|
|
|
|
186 |
multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
|
187 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
188 |
with gr.Column(scale=1):
|
189 |
btn = gr.Button("Synthesize", variant="primary")
|
190 |
audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
191 |
-
btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
|
192 |
|
193 |
|
194 |
|
|
|
60 |
# # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
61 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
62 |
if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
|
63 |
+
def synthesize(text, voice, lngsteps,embscale,alpha, beta, password, progress=gr.Progress()):
|
64 |
if text.strip() == "":
|
65 |
raise gr.Error("You must enter some text")
|
66 |
if len(text) > 50000:
|
|
|
73 |
audios = []
|
74 |
for t in progress.tqdm(texts):
|
75 |
print(t)
|
76 |
+
audios.append(styletts2importable.inference(t, voices[v], alpha=alpha, beta=beta, diffusion_steps=lngsteps, embedding_scale=embscale))
|
77 |
return (24000, np.concatenate(audios))
|
78 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
79 |
# if password == os.environ['ACCESS_CODE']:
|
|
|
178 |
# clbtn = gr.Button("Synthesize", variant="primary")
|
179 |
# claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
180 |
# clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
|
181 |
+
|
182 |
with gr.Blocks() as vctk:
|
183 |
with gr.Row():
|
184 |
with gr.Column(scale=1):
|
185 |
inp = gr.Textbox(label="Text", info="Enter the text | ใใญในใใๅ
ฅใใฆใใ ใใใ็ญใใใใจใฒใฉใใชใใพใ.", value="ใใชใใใใชใใจใไธ็ใฏ่ฒ่คชใใฆ่ฆใใพใใใใชใใฎ็ฌ้กใ็งใฎๆฅใ
ใๆใใ็
งใใใฆใใพใใใใชใใใใชใๆฅใฏใใพใใงๅฌใฎใใใซๅฏใใๆใใงใ.", interactive=True)
|
186 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
187 |
+
embscale = gr.Slider(minimum=1, maximum=10, value=1.8, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="ใใใไธใใใใใฃใจใจใขใผใทใงใใซใช้ณๅฃฐใซใชใใพใ๏ผไธใใใใใฎ้๏ผใๅขใใใใใใจใ ใใซใชใใฎใงใใๆณจๆใใ ใใ", interactive=True)
|
188 |
+
alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
|
189 |
+
beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
|
190 |
multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
|
191 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
192 |
with gr.Column(scale=1):
|
193 |
btn = gr.Button("Synthesize", variant="primary")
|
194 |
audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
195 |
+
btn.click(synthesize, inputs=[inp, voice, multispeakersteps,embscale,alpha,beta], outputs=[audio], concurrency_limit=4)
|
196 |
|
197 |
|
198 |
|