Spaces:

sdlc
/

Multi-Voice

Sleeping

App Files Files Community

hritiksdlccorp commited on Apr 26

Commit

71aa8ed

•

1 Parent(s): 82837f2

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -21

app.py CHANGED Viewed

@@ -187,13 +187,8 @@ def get_spepc(hps, filename):
 dict_language = {
-    ("中文1"): "all_zh",#全部按中文识别
-    ("English"): "en",#全部按英文识别#######不变
-    ("日文1"): "all_ja",#全部按日文识别
-    ("中文"): "zh",#按中英混合识别####不变
-    ("日本語"): "ja",#按日英混合识别####不变
-    ("混合"): "auto",#多语种启动切分识别语种
-}
 def splite_en_inf(sentence, language):
@@ -361,7 +356,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
     if not duration(ref_wav_path):
         return None
     if  text == '':
-        wprint("Please enter text to generate/请输入生成文字")
         return None
     t0 = ttime()
     startTime=timer()
@@ -382,8 +377,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
     if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
     text = text.strip("\n")
     if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
-    #print(("实际输入的参考文本:"), prompt_text)
-    #print(("📝实际输入的目标文本:"), text)
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
@@ -425,7 +419,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
-    print(f"🧨实际输入的目标文本(切句后):{text}\n")
     texts = text.split("\n")
     texts = merge_short_text_in_array(texts, 5)
     audio_opt = []
@@ -435,12 +429,12 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
         if (len(text.strip()) == 0):
             continue
         if (text[-1] not in splits): text += "。" if text_language != "en" else "."
-        print(("\n🎈实际输入的目标文本(每句):"), text)
         phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
         try:
             bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
         except RuntimeError as e:
-            wprint(f"The input text does not match the language/输入文本与语言不匹配: {e}")
             return None
         bert = torch.cat([bert1, bert2], 1)
@@ -481,7 +475,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
                 .numpy()[0, 0]
         )
         except RuntimeError as e:
-            wprint(f"The input text does not match the language/输入文本与语言不匹配: {e}")
             return None
         max_audio=np.abs(audio).max()
@@ -583,9 +577,7 @@ def cut5(inp):
 def custom_sort_key(s):
-    # 使用正则表达式提取字符串中的数字部分和非数字部分
     parts = re.split('(\d+)', s)
-    # 将数字部分转换为整数，非数字部分保持不变
     parts = [int(part) if part.isdigit() else part for part in parts]
     return parts
@@ -602,7 +594,7 @@ def wprint(text):
 def lang_detector(text):
     min_chars = 5
     if len(text) < min_chars:
-        return "Input text too short/输入文本太短"
     try:
         detector = Detector(text).language
         lang_info = str(detector)
@@ -651,12 +643,12 @@ def trim_text(text,language):
 def duration(audio_file_path):
     if not audio_file_path:
-        wprint("Failed to obtain uploaded audio/未找到音频文件")
         return False
     try:
         audio_duration = librosa.get_duration(filename=audio_file_path)
         if not 3 < audio_duration < 10:
-            wprint("The audio length must be between 3~10 seconds/音频时长须在3~10秒之间")
             return False
         return True
     except FileNotFoundError:
@@ -715,7 +707,7 @@ def clone_voice(user_voice,user_text,user_lang):
     if not duration(user_voice):
         return None
     if  user_text == '':
-        wprint("Please enter text to generate/请输入生成文字")
         return None
     user_text=trim_text(user_text,user_lang)
     time1=timer()
@@ -760,7 +752,6 @@ for model_name, model_info in models.items():
 ##########GRADIO###########
 with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
     gr.HTML('''
   <h1 style="font-size: 25px;">Text-to-Speech Generator</h1>

 dict_language = {
+    ("English"): "en"
+    }
 def splite_en_inf(sentence, language):
     if not duration(ref_wav_path):
         return None
     if  text == '':
+        wprint("Please enter text to generate")
         return None
     t0 = ttime()
     startTime=timer()
     if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
     text = text.strip("\n")
     if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
     zero_wav = np.zeros(
         int(hps.data.sampling_rate * 0.3),
         dtype=np.float16 if is_half == True else np.float32,
         text = cut5(text)
     while "\n\n" in text:
         text = text.replace("\n\n", "\n")
+    print(text)
     texts = text.split("\n")
     texts = merge_short_text_in_array(texts, 5)
     audio_opt = []
         if (len(text.strip()) == 0):
             continue
         if (text[-1] not in splits): text += "。" if text_language != "en" else "."
+        print(text)
         phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language)
         try:
             bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype)
         except RuntimeError as e:
+            wprint(f"The input text does not match the language: {e}")
             return None
         bert = torch.cat([bert1, bert2], 1)
                 .numpy()[0, 0]
         )
         except RuntimeError as e:
+            wprint(f"The input text does not match the language: {e}")
             return None
         max_audio=np.abs(audio).max()
 def custom_sort_key(s):
     parts = re.split('(\d+)', s)
     parts = [int(part) if part.isdigit() else part for part in parts]
     return parts
 def lang_detector(text):
     min_chars = 5
     if len(text) < min_chars:
+        return "Input text too short"
     try:
         detector = Detector(text).language
         lang_info = str(detector)
 def duration(audio_file_path):
     if not audio_file_path:
+        wprint("Failed to obtain uploaded audio")
         return False
     try:
         audio_duration = librosa.get_duration(filename=audio_file_path)
         if not 3 < audio_duration < 10:
+            wprint("The audio length must be between 3~10 seconds")
             return False
         return True
     except FileNotFoundError:
     if not duration(user_voice):
         return None
     if  user_text == '':
+        wprint("Please enter text to generate")
         return None
     user_text=trim_text(user_text,user_lang)
     time1=timer()
 ##########GRADIO###########
 with gr.Blocks(theme='Kasien/ali_theme_custom') as app:
     gr.HTML('''
   <h1 style="font-size: 25px;">Text-to-Speech Generator</h1>