mrfakename commited on
Commit
e1d1d80
1 Parent(s): 9c2897d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -74,7 +74,7 @@ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
74
  # return (24000, np.concatenate(audios))
75
  # else:
76
  # raise gr.Error('Wrong access code')
77
- def clsynthesize(text, voice, vcsteps, progress=gr.Progress()):
78
  # if text.strip() == "":
79
  # raise gr.Error("You must enter some text")
80
  # # if global_phonemizer.phonemize([text]) > 300:
@@ -92,7 +92,7 @@ def clsynthesize(text, voice, vcsteps, progress=gr.Progress()):
92
  texts = txtsplit(text)
93
  audios = []
94
  for t in progress.tqdm(texts):
95
- audios.append(styletts2importable.inference(t, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=vcsteps, embedding_scale=1))
96
  return (24000, np.concatenate(audios))
97
  def ljsynthesize(text, steps, progress=gr.Progress()):
98
  # if text.strip() == "":
@@ -133,10 +133,13 @@ with gr.Blocks() as clone:
133
  clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
134
  clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300, waveform_options={'waveform_progress_color': '#3C82F6'})
135
  vcsteps = gr.Slider(minimum=3, maximum=20, value=20, step=1, label="Diffusion Steps", info="Theoretically, higher should be better quality but slower, but we cannot notice a difference. Try with lower steps first - it is faster", interactive=True)
 
 
 
136
  with gr.Column(scale=1):
137
  clbtn = gr.Button("Synthesize", variant="primary")
138
  claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
139
- clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
140
  # with gr.Blocks() as longText:
141
  # with gr.Row():
142
  # with gr.Column(scale=1):
 
74
  # return (24000, np.concatenate(audios))
75
  # else:
76
  # raise gr.Error('Wrong access code')
77
+ def clsynthesize(text, voice, vcsteps, embscale, alpha, beta, progress=gr.Progress()):
78
  # if text.strip() == "":
79
  # raise gr.Error("You must enter some text")
80
  # # if global_phonemizer.phonemize([text]) > 300:
 
92
  texts = txtsplit(text)
93
  audios = []
94
  for t in progress.tqdm(texts):
95
+ audios.append(styletts2importable.inference(t, styletts2importable.compute_style(voice), alpha=alpha, beta=beta, diffusion_steps=vcsteps, embedding_scale=embscale))
96
  return (24000, np.concatenate(audios))
97
  def ljsynthesize(text, steps, progress=gr.Progress()):
98
  # if text.strip() == "":
 
133
  clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
134
  clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300, waveform_options={'waveform_progress_color': '#3C82F6'})
135
  vcsteps = gr.Slider(minimum=3, maximum=20, value=20, step=1, label="Diffusion Steps", info="Theoretically, higher should be better quality but slower, but we cannot notice a difference. Try with lower steps first - it is faster", interactive=True)
136
+ embscale = gr.Slider(minimum=1, maximum=10, value=2, step=0.1, label="Embedding Scale", info="Defaults to 2", interactive=True)
137
+ alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3", interactive=True)
138
+ beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7", interactive=True)
139
  with gr.Column(scale=1):
140
  clbtn = gr.Button("Synthesize", variant="primary")
141
  claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
142
+ clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
143
  # with gr.Blocks() as longText:
144
  # with gr.Row():
145
  # with gr.Column(scale=1):