Spaces:

rockdrigoma
/

XTTS_finetuned_bill

Runtime error

App Files Files Community

rockdrigoma commited on Aug 28

Commit

b865d16

•

1 Parent(s): b60f816

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -2

app.py CHANGED Viewed

@@ -3,19 +3,83 @@ import gradio as gr
 import torch
 from TTS.api import TTS
 import os
 os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cuda"
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
 @spaces.GPU(enable_queue=True)
-def clone(text, audio):
     tts.tts_to_file(text=text, speaker_wav=audio, language="es", file_path="./output.wav")
     return "./output.wav"
 demo = gr.Interface(
-    fn=clone,
     inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
     outputs=gr.Audio(type='filepath')
 )

 import torch
 from TTS.api import TTS
 import os
+import argparse
+import os
+import sys
+import tempfile
+import librosa.display
+import numpy as np
+import torchaudio
+import traceback
+from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list
+from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
 os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cuda"
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
+print(tts.model_path, "  " ,tts.sconfig_path)
+"""
+def clear_gpu_cache():
+    # clear the GPU cache
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+XTTS_MODEL = None
+def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
+    global XTTS_MODEL
+    clear_gpu_cache()
+    if not xtts_checkpoint or not xtts_config or not xtts_vocab:
+        return "You need to run the previous steps or manually set the `XTTS checkpoint path`, `XTTS config path`, and `XTTS vocab path` fields !!"
+    config = XttsConfig()
+    config.load_json(xtts_config)
+    XTTS_MODEL = Xtts.init_from_config(config)
+    print("Loading XTTS model! ")
+    XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
+    if torch.cuda.is_available():
+        XTTS_MODEL.cuda()
+    print("Model Loaded!")
+    return "Model Loaded!"
+def run_tts(lang='es', tts_text, speaker_audio_file):
+    if XTTS_MODEL is None or not speaker_audio_file:
+        return "You need to run the previous step to load the model !!", None, None
+    gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs)
+    out = XTTS_MODEL.inference(
+        text=tts_text,
+        language=lang,
+        gpt_cond_latent=gpt_cond_latent,
+        speaker_embedding=speaker_embedding,
+        temperature=XTTS_MODEL.config.temperature, # Add custom parameters here
+        length_penalty=XTTS_MODEL.config.length_penalty,
+        repetition_penalty=XTTS_MODEL.config.repetition_penalty,
+        top_k=XTTS_MODEL.config.top_k,
+        top_p=XTTS_MODEL.config.top_p,
+    )
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
+        out_path = fp.name
+        torchaudio.save(out_path, out["wav"], 24000)
+    return "Speech generated !", out_path, speaker_audio_file
+"""
 @spaces.GPU(enable_queue=True)
+def generate(text, audio):
     tts.tts_to_file(text=text, speaker_wav=audio, language="es", file_path="./output.wav")
     return "./output.wav"
 demo = gr.Interface(
+    fn=generate,
     inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
     outputs=gr.Audio(type='filepath')
 )