Ahsen Khaliq commited on
Commit
c6cf580
1 Parent(s): 67915ff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import torch
4
+ import scipy
5
+ from espnet2.bin.tts_inference import Text2Speech
6
+ from espnet2.utils.types import str_or_none
7
+
8
+ lang = 'English'
9
+ tag = 'kan-bayashi/ljspeech_vits'
10
+ vocoder_tag = "none"
11
+
12
+ text2speech = Text2Speech.from_pretrained(
13
+ model_tag=str_or_none(tag),
14
+ vocoder_tag=str_or_none(vocoder_tag),
15
+ device="cpu",
16
+ # Only for Tacotron 2 & Transformer
17
+ threshold=0.5,
18
+ # Only for Tacotron 2
19
+ minlenratio=0.0,
20
+ maxlenratio=10.0,
21
+ use_att_constraint=False,
22
+ backward_window=1,
23
+ forward_window=3,
24
+ # Only for FastSpeech & FastSpeech2 & VITS
25
+ speed_control_alpha=1.0,
26
+ # Only for VITS
27
+ noise_scale=0.333,
28
+ noise_scale_dur=0.333,
29
+ )
30
+
31
+
32
+ def inference(text):
33
+ with torch.no_grad():
34
+ start = time.time()
35
+ wav = text2speech(text)["wav"]
36
+ scipy.io.wavfile.write("out.wav",text2speech.fs , wav.view(-1).cpu().numpy())
37
+ return "out.wav"
38
+ title = "TTS"
39
+ description = "demo for Anime2Sketch. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
40
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.05703'>Adversarial Open Domain Adaption for Sketch-to-Photo Synthesis</a> | <a href='https://github.com/Mukosame/Anime2Sketch'>Github Repo</a></p>"
41
+
42
+ gr.Interface(
43
+ inference,
44
+ "text",
45
+ gr.outputs.Audio(type="file", label="Output"),
46
+ title=title,
47
+ description=description,
48
+ article=article,
49
+ enable_queue=True
50
+ ).launch(debug=True)