Respair commited on
Commit
5782b8e
โ€ข
1 Parent(s): ce7d002

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -60,7 +60,7 @@ for v in voicelist:
60
  # # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
61
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
62
  if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
63
- def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
64
  if text.strip() == "":
65
  raise gr.Error("You must enter some text")
66
  if len(text) > 50000:
@@ -73,7 +73,7 @@ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
73
  audios = []
74
  for t in progress.tqdm(texts):
75
  print(t)
76
- audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.4, diffusion_steps=lngsteps, embedding_scale=1.5))
77
  return (24000, np.concatenate(audios))
78
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
79
  # if password == os.environ['ACCESS_CODE']:
@@ -178,17 +178,21 @@ def ljsynthesize(text, steps,embscale, progress=gr.Progress()):
178
  # clbtn = gr.Button("Synthesize", variant="primary")
179
  # claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
180
  # clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
 
181
  with gr.Blocks() as vctk:
182
  with gr.Row():
183
  with gr.Column(scale=1):
184
  inp = gr.Textbox(label="Text", info="Enter the text | ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅใ‚Œใฆใใ ใ•ใ„ใ€็Ÿญใ™ใŽใ‚‹ใจใฒใฉใใชใ‚Šใพใ™.", value="ใ‚ใชใŸใŒใ„ใชใ„ใจใ€ไธ–็•Œใฏ่‰ฒ่คชใ›ใฆ่ฆ‹ใˆใพใ™ใ€‚ใ‚ใชใŸใฎ็ฌ‘้ก”ใŒ็งใฎๆ—ฅใ€…ใ‚’ๆ˜Žใ‚‹ใ็…งใ‚‰ใ—ใฆใ„ใพใ™ใ€‚ใ‚ใชใŸใŒใ„ใชใ„ๆ—ฅใฏใ€ใพใ‚‹ใงๅ†ฌใฎใ‚ˆใ†ใซๅฏ’ใใ€ๆš—ใ„ใงใ™.", interactive=True)
185
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
 
 
 
186
  multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
187
  # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
188
  with gr.Column(scale=1):
189
  btn = gr.Button("Synthesize", variant="primary")
190
  audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
191
- btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
192
 
193
 
194
 
 
60
  # # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
61
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
62
  if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
63
+ def synthesize(text, voice, lngsteps,embscale,alpha, beta, password, progress=gr.Progress()):
64
  if text.strip() == "":
65
  raise gr.Error("You must enter some text")
66
  if len(text) > 50000:
 
73
  audios = []
74
  for t in progress.tqdm(texts):
75
  print(t)
76
+ audios.append(styletts2importable.inference(t, voices[v], alpha=alpha, beta=beta, diffusion_steps=lngsteps, embedding_scale=embscale))
77
  return (24000, np.concatenate(audios))
78
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
79
  # if password == os.environ['ACCESS_CODE']:
 
178
  # clbtn = gr.Button("Synthesize", variant="primary")
179
  # claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
180
  # clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
181
+
182
  with gr.Blocks() as vctk:
183
  with gr.Row():
184
  with gr.Column(scale=1):
185
  inp = gr.Textbox(label="Text", info="Enter the text | ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅใ‚Œใฆใใ ใ•ใ„ใ€็Ÿญใ™ใŽใ‚‹ใจใฒใฉใใชใ‚Šใพใ™.", value="ใ‚ใชใŸใŒใ„ใชใ„ใจใ€ไธ–็•Œใฏ่‰ฒ่คชใ›ใฆ่ฆ‹ใˆใพใ™ใ€‚ใ‚ใชใŸใฎ็ฌ‘้ก”ใŒ็งใฎๆ—ฅใ€…ใ‚’ๆ˜Žใ‚‹ใ็…งใ‚‰ใ—ใฆใ„ใพใ™ใ€‚ใ‚ใชใŸใŒใ„ใชใ„ๆ—ฅใฏใ€ใพใ‚‹ใงๅ†ฌใฎใ‚ˆใ†ใซๅฏ’ใใ€ๆš—ใ„ใงใ™.", interactive=True)
186
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
187
+ embscale = gr.Slider(minimum=1, maximum=10, value=1.8, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="ใ“ใ‚Œใ‚’ไธŠใ’ใŸใ‚‰ใ‚‚ใฃใจใ‚จใƒขใƒผใ‚ทใƒงใƒŠใƒซใช้Ÿณๅฃฐใซใชใ‚Šใพใ™๏ผˆไธ‹ใ’ใŸใ‚‰ใใฎ้€†๏ผ‰ใ€ๅข—ใ‚„ใ—ใ™ใŽใ‚‹ใจใ ใ‚ใซใชใ‚‹ใฎใงใ€ใ”ๆณจๆ„ใใ ใ•ใ„", interactive=True)
188
+ alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
189
+ beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
190
  multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
191
  # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
192
  with gr.Column(scale=1):
193
  btn = gr.Button("Synthesize", variant="primary")
194
  audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
195
+ btn.click(synthesize, inputs=[inp, voice, multispeakersteps,embscale,alpha,beta], outputs=[audio], concurrency_limit=4)
196
 
197
 
198