rockdrigoma commited on
Commit
b865d16
1 Parent(s): b60f816

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -2
app.py CHANGED
@@ -3,19 +3,83 @@ import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  os.environ["COQUI_TOS_AGREED"] = "1"
7
 
8
  device = "cuda"
9
 
10
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  @spaces.GPU(enable_queue=True)
13
- def clone(text, audio):
14
  tts.tts_to_file(text=text, speaker_wav=audio, language="es", file_path="./output.wav")
15
  return "./output.wav"
16
 
17
  demo = gr.Interface(
18
- fn=clone,
19
  inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
20
  outputs=gr.Audio(type='filepath')
21
  )
 
3
  import torch
4
  from TTS.api import TTS
5
  import os
6
+ import argparse
7
+ import os
8
+ import sys
9
+ import tempfile
10
+ import librosa.display
11
+ import numpy as np
12
+
13
+ import torchaudio
14
+ import traceback
15
+ from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list
16
+ from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt
17
+
18
+ from TTS.tts.configs.xtts_config import XttsConfig
19
+ from TTS.tts.models.xtts import Xtts
20
+
21
  os.environ["COQUI_TOS_AGREED"] = "1"
22
 
23
  device = "cuda"
24
 
25
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_bill_spa").to(device)
26
+ print(tts.model_path, " " ,tts.sconfig_path)
27
+
28
+ """
29
+ def clear_gpu_cache():
30
+ # clear the GPU cache
31
+ if torch.cuda.is_available():
32
+ torch.cuda.empty_cache()
33
+
34
+ XTTS_MODEL = None
35
+ def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
36
+ global XTTS_MODEL
37
+ clear_gpu_cache()
38
+ if not xtts_checkpoint or not xtts_config or not xtts_vocab:
39
+ return "You need to run the previous steps or manually set the `XTTS checkpoint path`, `XTTS config path`, and `XTTS vocab path` fields !!"
40
+ config = XttsConfig()
41
+ config.load_json(xtts_config)
42
+ XTTS_MODEL = Xtts.init_from_config(config)
43
+ print("Loading XTTS model! ")
44
+ XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False)
45
+ if torch.cuda.is_available():
46
+ XTTS_MODEL.cuda()
47
+
48
+ print("Model Loaded!")
49
+ return "Model Loaded!"
50
+
51
+ def run_tts(lang='es', tts_text, speaker_audio_file):
52
+ if XTTS_MODEL is None or not speaker_audio_file:
53
+ return "You need to run the previous step to load the model !!", None, None
54
+
55
+ gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs)
56
+ out = XTTS_MODEL.inference(
57
+ text=tts_text,
58
+ language=lang,
59
+ gpt_cond_latent=gpt_cond_latent,
60
+ speaker_embedding=speaker_embedding,
61
+ temperature=XTTS_MODEL.config.temperature, # Add custom parameters here
62
+ length_penalty=XTTS_MODEL.config.length_penalty,
63
+ repetition_penalty=XTTS_MODEL.config.repetition_penalty,
64
+ top_k=XTTS_MODEL.config.top_k,
65
+ top_p=XTTS_MODEL.config.top_p,
66
+ )
67
+
68
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
69
+ out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
70
+ out_path = fp.name
71
+ torchaudio.save(out_path, out["wav"], 24000)
72
+
73
+ return "Speech generated !", out_path, speaker_audio_file
74
+ """
75
 
76
  @spaces.GPU(enable_queue=True)
77
+ def generate(text, audio):
78
  tts.tts_to_file(text=text, speaker_wav=audio, language="es", file_path="./output.wav")
79
  return "./output.wav"
80
 
81
  demo = gr.Interface(
82
+ fn=generate,
83
  inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
84
  outputs=gr.Audio(type='filepath')
85
  )