Spaces:

pytorch
/

Tacotron2

Running

akhaliq HF Staff commited on Mar 1, 2022

Commit

4e45c79

1 Parent(s): bcb66b7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 import torchaudio
 import gradio as gr
 device="cpu"
 bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
@@ -36,15 +37,17 @@ def inference(text):
       spec, _, _ = tacotron2.infer(processed, lengths)
   plt.imshow(spec[0].cpu().detach())
   with torch.no_grad():
       waveforms = waveglow.infer(spec)
   torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050)
-  return "output_waveglow.wav",plt
 title="TACOTRON 2"
 description="Gradio demo for TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
 examples=[["life is like a box of chocolates"]]
-gr.Interface(inference,"text",[gr.outputs.Audio(type="file"),gr.outputs.Image(type="plot",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)

 import torch
 import torchaudio
 import gradio as gr
+import matplotlib as plt
 device="cpu"
 bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
       spec, _, _ = tacotron2.infer(processed, lengths)
   plt.imshow(spec[0].cpu().detach())
+  plt.axis('off')
+  plt.savefig("test.png", bbox_inches='tight')
   with torch.no_grad():
       waveforms = waveglow.infer(spec)
   torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050)
+  return "output_waveglow.wav","test.png"
 title="TACOTRON 2"
 description="Gradio demo for TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
 examples=[["life is like a box of chocolates"]]
+gr.Interface(inference,"text",[gr.outputs.Audio(type="file"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)