ayymen commited on
Commit
15e7b85
1 Parent(s): e707d6a

Enable voice cloning

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import tempfile
3
- from TTS.utils.synthesizer import Synthesizer
4
  from huggingface_hub import hf_hub_download
5
  import torch
6
 
@@ -20,41 +20,42 @@ my_examples = [
20
 
21
  my_inputs = [
22
  gr.Textbox(lines=5, label="Input Text"),
 
23
  gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
24
  ]
25
 
26
- my_outputs = gr.Audio(type="filepath", label="Output Audio")
27
 
28
- def tts(text: str, split_sentences: bool = True):
29
- best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
30
- config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
31
-
32
- # init synthesizer
33
- synthesizer = Synthesizer(
34
- best_model_path,
35
- config_path,
36
- use_cuda=CUDA
37
- )
38
 
 
 
 
 
 
 
39
  # replace oov characters
40
  text = text.replace("\n", ". ")
41
  text = text.replace("(", ",")
42
  text = text.replace(")", ",")
43
  text = text.replace(";", ",")
44
 
45
- # create audio file
46
- wavs = synthesizer.tts(text, split_sentences=split_sentences)
47
  with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
48
- synthesizer.save_wav(wavs, fp)
 
 
 
 
49
  return fp.name
50
-
51
  iface = gr.Interface(
52
  fn=tts,
53
  inputs=my_inputs,
54
  outputs=my_outputs,
55
  title=my_title,
56
- description = my_description,
57
- examples = my_examples,
58
  cache_examples=True
59
  )
60
  iface.launch()
 
1
  import gradio as gr
2
  import tempfile
3
+ from TTS.api import TTS
4
  from huggingface_hub import hf_hub_download
5
  import torch
6
 
 
20
 
21
  my_inputs = [
22
  gr.Textbox(lines=5, label="Input Text"),
23
+ gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
24
  gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
25
  ]
26
 
27
+ my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
28
 
29
+ best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
30
+ config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
 
 
 
 
 
 
 
 
31
 
32
+ api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
33
+
34
+ # load voice conversion model
35
+ api.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24", gpu=CUDA)
36
+
37
+ def tts(text: str, speaker_wav: str = None, split_sentences: bool = True):
38
  # replace oov characters
39
  text = text.replace("\n", ". ")
40
  text = text.replace("(", ",")
41
  text = text.replace(")", ",")
42
  text = text.replace(";", ",")
43
 
 
 
44
  with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
45
+ if speaker_wav:
46
+ api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
47
+ else:
48
+ api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
49
+
50
  return fp.name
51
+
52
  iface = gr.Interface(
53
  fn=tts,
54
  inputs=my_inputs,
55
  outputs=my_outputs,
56
  title=my_title,
57
+ description=my_description,
58
+ examples=my_examples,
59
  cache_examples=True
60
  )
61
  iface.launch()