Spaces:
Sleeping
Sleeping
hritiksdlccorp
commited on
Commit
•
71aa8ed
1
Parent(s):
82837f2
Update app.py
Browse files
app.py
CHANGED
@@ -187,13 +187,8 @@ def get_spepc(hps, filename):
|
|
187 |
|
188 |
|
189 |
dict_language = {
|
190 |
-
("
|
191 |
-
|
192 |
-
("日文1"): "all_ja",#全部按日文识别
|
193 |
-
("中文"): "zh",#按中英混合识别####不变
|
194 |
-
("日本語"): "ja",#按日英混合识别####不变
|
195 |
-
("混合"): "auto",#多语种启动切分识别语种
|
196 |
-
}
|
197 |
|
198 |
|
199 |
def splite_en_inf(sentence, language):
|
@@ -361,7 +356,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|
361 |
if not duration(ref_wav_path):
|
362 |
return None
|
363 |
if text == '':
|
364 |
-
wprint("Please enter text to generate
|
365 |
return None
|
366 |
t0 = ttime()
|
367 |
startTime=timer()
|
@@ -382,8 +377,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|
382 |
if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
|
383 |
text = text.strip("\n")
|
384 |
if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
|
385 |
-
|
386 |
-
#print(("📝实际输入的目标文本:"), text)
|
387 |
zero_wav = np.zeros(
|
388 |
int(hps.data.sampling_rate * 0.3),
|
389 |
dtype=np.float16 if is_half == True else np.float32,
|
@@ -425,7 +419,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|
425 |
text = cut5(text)
|
426 |
while "\n\n" in text:
|
427 |
text = text.replace("\n\n", "\n")
|
428 |
-
print(
|
429 |
texts = text.split("\n")
|
430 |
texts = merge_short_text_in_array(texts, 5)
|
431 |
audio_opt = []
|
@@ -435,12 +429,12 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|
435 |
if (len(text.strip()) == 0):
|
436 |
continue
|
437 |
if (text[-1] not in splits): text += "。" if text_language != "en" else "."
|
438 |
-
print(
|
439 |
phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
|
440 |
try:
|
441 |
bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
|
442 |
except RuntimeError as e:
|
443 |
-
wprint(f"The input text does not match the language
|
444 |
return None
|
445 |
bert = torch.cat([bert1, bert2], 1)
|
446 |
|
@@ -481,7 +475,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|
481 |
.numpy()[0, 0]
|
482 |
)
|
483 |
except RuntimeError as e:
|
484 |
-
wprint(f"The input text does not match the language
|
485 |
return None
|
486 |
|
487 |
max_audio=np.abs(audio).max()
|
@@ -583,9 +577,7 @@ def cut5(inp):
|
|
583 |
|
584 |
|
585 |
def custom_sort_key(s):
|
586 |
-
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
587 |
parts = re.split('(\d+)', s)
|
588 |
-
# 将数字部分转换为整数,非数字部分保持不变
|
589 |
parts = [int(part) if part.isdigit() else part for part in parts]
|
590 |
return parts
|
591 |
|
@@ -602,7 +594,7 @@ def wprint(text):
|
|
602 |
def lang_detector(text):
|
603 |
min_chars = 5
|
604 |
if len(text) < min_chars:
|
605 |
-
return "Input text too short
|
606 |
try:
|
607 |
detector = Detector(text).language
|
608 |
lang_info = str(detector)
|
@@ -651,12 +643,12 @@ def trim_text(text,language):
|
|
651 |
|
652 |
def duration(audio_file_path):
|
653 |
if not audio_file_path:
|
654 |
-
wprint("Failed to obtain uploaded audio
|
655 |
return False
|
656 |
try:
|
657 |
audio_duration = librosa.get_duration(filename=audio_file_path)
|
658 |
if not 3 < audio_duration < 10:
|
659 |
-
wprint("The audio length must be between 3~10 seconds
|
660 |
return False
|
661 |
return True
|
662 |
except FileNotFoundError:
|
@@ -715,7 +707,7 @@ def clone_voice(user_voice,user_text,user_lang):
|
|
715 |
if not duration(user_voice):
|
716 |
return None
|
717 |
if user_text == '':
|
718 |
-
wprint("Please enter text to generate
|
719 |
return None
|
720 |
user_text=trim_text(user_text,user_lang)
|
721 |
time1=timer()
|
@@ -760,7 +752,6 @@ for model_name, model_info in models.items():
|
|
760 |
|
761 |
##########GRADIO###########
|
762 |
|
763 |
-
|
764 |
with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
|
765 |
gr.HTML('''
|
766 |
<h1 style="font-size: 25px;">Text-to-Speech Generator</h1>
|
|
|
187 |
|
188 |
|
189 |
dict_language = {
|
190 |
+
("English"): "en"
|
191 |
+
}
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
|
194 |
def splite_en_inf(sentence, language):
|
|
|
356 |
if not duration(ref_wav_path):
|
357 |
return None
|
358 |
if text == '':
|
359 |
+
wprint("Please enter text to generate")
|
360 |
return None
|
361 |
t0 = ttime()
|
362 |
startTime=timer()
|
|
|
377 |
if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
|
378 |
text = text.strip("\n")
|
379 |
if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
|
380 |
+
|
|
|
381 |
zero_wav = np.zeros(
|
382 |
int(hps.data.sampling_rate * 0.3),
|
383 |
dtype=np.float16 if is_half == True else np.float32,
|
|
|
419 |
text = cut5(text)
|
420 |
while "\n\n" in text:
|
421 |
text = text.replace("\n\n", "\n")
|
422 |
+
print(text)
|
423 |
texts = text.split("\n")
|
424 |
texts = merge_short_text_in_array(texts, 5)
|
425 |
audio_opt = []
|
|
|
429 |
if (len(text.strip()) == 0):
|
430 |
continue
|
431 |
if (text[-1] not in splits): text += "。" if text_language != "en" else "."
|
432 |
+
print(text)
|
433 |
phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
|
434 |
try:
|
435 |
bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
|
436 |
except RuntimeError as e:
|
437 |
+
wprint(f"The input text does not match the language: {e}")
|
438 |
return None
|
439 |
bert = torch.cat([bert1, bert2], 1)
|
440 |
|
|
|
475 |
.numpy()[0, 0]
|
476 |
)
|
477 |
except RuntimeError as e:
|
478 |
+
wprint(f"The input text does not match the language: {e}")
|
479 |
return None
|
480 |
|
481 |
max_audio=np.abs(audio).max()
|
|
|
577 |
|
578 |
|
579 |
def custom_sort_key(s):
|
|
|
580 |
parts = re.split('(\d+)', s)
|
|
|
581 |
parts = [int(part) if part.isdigit() else part for part in parts]
|
582 |
return parts
|
583 |
|
|
|
594 |
def lang_detector(text):
|
595 |
min_chars = 5
|
596 |
if len(text) < min_chars:
|
597 |
+
return "Input text too short"
|
598 |
try:
|
599 |
detector = Detector(text).language
|
600 |
lang_info = str(detector)
|
|
|
643 |
|
644 |
def duration(audio_file_path):
|
645 |
if not audio_file_path:
|
646 |
+
wprint("Failed to obtain uploaded audio")
|
647 |
return False
|
648 |
try:
|
649 |
audio_duration = librosa.get_duration(filename=audio_file_path)
|
650 |
if not 3 < audio_duration < 10:
|
651 |
+
wprint("The audio length must be between 3~10 seconds")
|
652 |
return False
|
653 |
return True
|
654 |
except FileNotFoundError:
|
|
|
707 |
if not duration(user_voice):
|
708 |
return None
|
709 |
if user_text == '':
|
710 |
+
wprint("Please enter text to generate")
|
711 |
return None
|
712 |
user_text=trim_text(user_text,user_lang)
|
713 |
time1=timer()
|
|
|
752 |
|
753 |
##########GRADIO###########
|
754 |
|
|
|
755 |
with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
|
756 |
gr.HTML('''
|
757 |
<h1 style="font-size: 25px;">Text-to-Speech Generator</h1>
|