mrfakename commited on
Commit
97cf9a5
1 Parent(s): 6a65b07

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. src/f5_tts/infer/utils_infer.py +3 -3
src/f5_tts/infer/utils_infer.py CHANGED
@@ -186,17 +186,17 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=print, device=
186
  non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
187
  non_silent_wave = AudioSegment.silent(duration=0)
188
  for non_silent_seg in non_silent_segs:
189
- if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
190
  show_info("Audio is over 15s, clipping short.")
191
  break
192
  non_silent_wave += non_silent_seg
193
 
194
  # 2. try to find short silence for clipping if 1. failed
195
  if len(non_silent_wave) > 15000:
196
- non_silent_segs = silence.split_on_silence(aseg, min_silence_len=200, silence_thresh=-45, keep_silence=1000)
197
  non_silent_wave = AudioSegment.silent(duration=0)
198
  for non_silent_seg in non_silent_segs:
199
- if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
200
  show_info("Audio is over 15s, clipping short.")
201
  break
202
  non_silent_wave += non_silent_seg
 
186
  non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
187
  non_silent_wave = AudioSegment.silent(duration=0)
188
  for non_silent_seg in non_silent_segs:
189
+ if len(non_silent_wave) > 6000 and len(non_silent_wave + non_silent_seg) > 16000:
190
  show_info("Audio is over 15s, clipping short.")
191
  break
192
  non_silent_wave += non_silent_seg
193
 
194
  # 2. try to find short silence for clipping if 1. failed
195
  if len(non_silent_wave) > 15000:
196
+ non_silent_segs = silence.split_on_silence(aseg, min_silence_len=100, silence_thresh=-40, keep_silence=1000)
197
  non_silent_wave = AudioSegment.silent(duration=0)
198
  for non_silent_seg in non_silent_segs:
199
+ if len(non_silent_wave) > 6000 and len(non_silent_wave + non_silent_seg) > 16000:
200
  show_info("Audio is over 15s, clipping short.")
201
  break
202
  non_silent_wave += non_silent_seg