Spaces:

thinhlpg
/

vixtts-demo

Running on Zero

App Files Files Community

thinhlpg commited on May 6

Commit

376b5d9

•

1 Parent(s): c837795

chores: more clean up

Browse files

Files changed (1) hide show

app.py +20 -67

app.py CHANGED Viewed

@@ -11,31 +11,28 @@ os.system("python -m unidic download")
 import csv
 import datetime
 import re
 from io import StringIO
 import gradio as gr
-# langid is used to detect language for longer text
-# Most users expect text to be their own language, there is checkbox to disable it
-import langid
-from huggingface_hub import hf_hub_download, snapshot_download
-from TTS.api import TTS
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
-from underthesea import sent_tokenize
-from unidecode import unidecode
 from vinorm import TTSnorm
-HF_TOKEN = os.environ.get("HF_TOKEN")
-from huggingface_hub import HfApi
-# will use api to restart space on a unrecoverable error
 api = HfApi(token=HF_TOKEN)
 # This will trigger downloading model
-print("Downloading if not downloaded Coqui XTTS V2")
 checkpoint_dir = "model/"
 repo_id = "capleaf/viXTTS"
 use_deepspeed = False
@@ -154,13 +151,7 @@ def predict(
             )
             gr.Warning("Unhandled Exception encounter, please retry in a minute")
             print("Cuda device-assert Runtime encountered need restart")
-            if not DEVICE_ASSERT_DETECTED:
-                DEVICE_ASSERT_DETECTED = 1
-                DEVICE_ASSERT_PROMPT = prompt
-                DEVICE_ASSERT_LANG = language
-            # just before restarting save what caused the issue so we can handle it in future
-            # Uploading Error data only happens for unrecovarable error
             error_time = datetime.datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
             error_data = [
                 error_time,
@@ -212,59 +203,28 @@ def predict(
                 )
             else:
                 print("RuntimeError: non device-side assert error:", str(e))
-                gr.Warning("Something unexpected happened please retry again.")
-            return (
-                None,
-                None,
-                None,
-                None,
-            )
-    return (
-        gr.make_waveform(
-            audio="output.wav",
-        ),
-        "output.wav",
-        metrics_text,
-        speaker_wav,
-    )
 title = "viXTTS Demo"
-description = """
-<br/>
-This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star 🌟 on Github <a href="https://github.com/coqui-ai/TTS">🐸TTS</a>, where our open-source inference and training code lives.
-<br/>
-Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh-cn, Czech: cs, Dutch: nl, English: en, French: fr, German: de, Italian: it, Polish: pl, Russian: ru, Spanish: es, Turkish: tr, Japanese: ja, Korean: ko, Hungarian: hu, Hindi: hi
-<br/>
-"""
-article = """
-"""
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown(
                 """
-                😳 Burh
                 """
             )
         with gr.Column():
             # placeholder to align the image
             pass
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown(description)
     with gr.Row():
         with gr.Column():
             input_text_gr = gr.Textbox(
@@ -304,19 +264,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
                 type="filepath",
                 value="model/samples/nu-luu-loat.wav",
             )
-            mic_gr = gr.Audio(
-                source="microphone",
-                type="filepath",
-                info="Use your microphone to record audio",
-                label="Use Microphone for Reference",
-            )
             tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
         with gr.Column():
-            video_gr = gr.Video(label="Waveform Visual")
             audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
             out_text_gr = gr.Text(label="Metrics")
-            ref_audio_gr = gr.Audio(label="Reference Audio Used")
     tts_button.click(
         predict,
@@ -324,10 +276,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
             input_text_gr,
             language_gr,
             ref_gr,
-            mic_gr,
         ],
-        outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
     )
 demo.queue()
-demo.launch(debug=True, show_api=True)

 import csv
 import datetime
+import os
 import re
+import time
+import uuid
 from io import StringIO
 import gradio as gr
+import torch
+import torchaudio
+from huggingface_hub import HfApi, hf_hub_download, snapshot_download
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 from vinorm import TTSnorm
+# download for mecab
+# os.system("python -m unidic download")
+HF_TOKEN = os.environ.get("HF_TOKEN")
 api = HfApi(token=HF_TOKEN)
 # This will trigger downloading model
+print("Downloading if not downloaded viXTTS")
 checkpoint_dir = "model/"
 repo_id = "capleaf/viXTTS"
 use_deepspeed = False
             )
             gr.Warning("Unhandled Exception encounter, please retry in a minute")
             print("Cuda device-assert Runtime encountered need restart")
             error_time = datetime.datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
             error_data = [
                 error_time,
                 )
             else:
                 print("RuntimeError: non device-side assert error:", str(e))
+                metrics_text = gr.Warning(
+                    "Something unexpected happened please retry again."
+                )
+            return (None, metrics_text)
+    return ("output.wav", metrics_text)
 title = "viXTTS Demo"
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown(
                 """
+                viXTTS Demo
                 """
             )
         with gr.Column():
             # placeholder to align the image
             pass
     with gr.Row():
         with gr.Column():
             input_text_gr = gr.Textbox(
                 type="filepath",
                 value="model/samples/nu-luu-loat.wav",
             )
             tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
         with gr.Column():
             audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
             out_text_gr = gr.Text(label="Metrics")
     tts_button.click(
         predict,
             input_text_gr,
             language_gr,
             ref_gr,
+            normalize_text,
         ],
+        outputs=[audio_gr, out_text_gr],
+        api_name="predict",
     )
 demo.queue()
+demo.launch(debug=True, show_api=True, share=True)