Spaces:

Kevin676
/

ChatGPT-with-Voice-Cloning-for-All

Runtime error

App Files Files Community

Kevin676 commited on Apr 16, 2023

Commit

22c3d58

1 Parent(s): a3da95c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -14

app.py CHANGED Viewed

@@ -45,10 +45,13 @@ def chatgpt(apikey, result):
     return chat_response
-def english(text_en, upload):
-    tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")
     noisy = enhance_model.load_audio(
     "output.wav"
     ).unsqueeze(0)
@@ -58,14 +61,29 @@ def english(text_en, upload):
     return "enhanced.wav"
-def chinese(text_cn, upload1):
-    tts2.tts_with_vc_to_file(
-        text_cn + "。",
-        speaker_wav=upload1,
-        file_path="ouptut1.wav"
-    )
-    return "ouptut1.wav"
 block = gr.Blocks()
@@ -95,13 +113,13 @@ with block:
             with gr.Row().style(mobile_collapse=False, equal_height=True):
                 inp3 = texts1
                 inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
-#                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')
                 btn1 = gr.Button("用喜欢的声音听一听吧(中文)")
         out1 = gr.Audio(label="合成的专属声音(中文)")
-        btn1.click(chinese, [inp3, inp4], [out1])
         with gr.Box():
             with gr.Row().style(mobile_collapse=False, equal_height=True):
@@ -110,7 +128,7 @@ with block:
         out2 = gr.Audio(label="合成的专属声音(英文)")
-        btn2.click(english, [inp3, inp4], [out2])
         gr.Markdown(
             """ ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
@@ -124,7 +142,7 @@ with block:
         <div class="footer">
                     <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
                     </p>
-                    <p>注：中文声音克隆实际上是由声音转换(Voice Conversion)实现，所以输出结果可能更像是一种新的声音，效果不一定很理想，希望大家理解！
                     </p>
         </div>
         ''')

     return chat_response
+def english(text_en, upload, VoiceMicrophone):
+    if upload is not None:
+        tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")
+    else:
+        tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
     noisy = enhance_model.load_audio(
     "output.wav"
     ).unsqueeze(0)
     return "enhanced.wav"
+def chinese(text_cn, upload1, VoiceMicrophone):
+    if upload1 is not None:
+        tts2.tts_with_vc_to_file(
+            text_cn + "。",
+            speaker_wav=upload1,
+            file_path="ouptut1.wav"
+        )
+    else:
+        tts2.tts_with_vc_to_file(
+            text_cn + "。",
+            speaker_wav=upload1,
+            file_path="ouptut1.wav"
+        )
+    noisy = enhance_model.load_audio(
+    "output1.wav"
+    ).unsqueeze(0)
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+    torchaudio.save("enhanced1.wav", enhanced.cpu(), 16000)
+    return "enhanced1.wav"
 block = gr.Blocks()
             with gr.Row().style(mobile_collapse=False, equal_height=True):
                 inp3 = texts1
                 inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
+                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')
                 btn1 = gr.Button("用喜欢的声音听一听吧(中文)")
         out1 = gr.Audio(label="合成的专属声音(中文)")
+        btn1.click(chinese, [inp3, inp4, inp5], [out1])
         with gr.Box():
             with gr.Row().style(mobile_collapse=False, equal_height=True):
         out2 = gr.Audio(label="合成的专属声音(英文)")
+        btn2.click(english, [inp3, inp4, inp5], [out2])
         gr.Markdown(
             """ ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
         <div class="footer">
                     <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
                     </p>
+                    <p>注：中文声音克隆实际上是由声音转换(Voice Conversion)实现，所以输出结果可能更像是一种新的声音，效果不一定很理想，希望大家理解(之后也会不断迭代的)！为了更好的效果，使用中文声音克隆时请尽量上传女声。
                     </p>
         </div>
         ''')