File size: 3,020 Bytes
cf5f5dd
 
d5ffb69
cf5f5dd
 
 
 
 
 
426c9cb
 
a9d4fe4
 
 
bbcab34
913e872
 
cf5f5dd
 
098bdf9
 
 
 
e16e942
098bdf9
e16e942
098bdf9
e16e942
098bdf9
bbcab34
098bdf9
bbcab34
098bdf9
913e872
 
 
098bdf9
 
 
 
 
 
 
e16e942
098bdf9
e16e942
bbcab34
e16e942
bbcab34
 
913e872
bbcab34
913e872
 
098bdf9
 
 
 
 
913e872
098bdf9
fb051a3
098bdf9
cf5f5dd
 
1ada14b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr

title = "fairseq S^2: A Scalable and Integrable Speech Synthesis Toolkit"

description = "Gradio Demo for fairseq S^2: A Scalable and Integrable Speech Synthesis Toolkit. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2109.06912' target='_blank'>fairseq S^2: A Scalable and Integrable Speech Synthesis Toolkit</a> | <a href='https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis' target='_blank'>Github Repo</a></p>"

examples = [
    ["Hello this is a test run","fastspeech2-en-200_speaker-cv4"],
    ["Hello, this is a test run.","tts_transformer-en-200_speaker-cv4"],
    ["Bonjour, ceci est un test.","tts_transformer-fr-cv7_css10"],
    ["BЗдравствуйте, это пробный запуск.","tts_transformer-ru-cv7_css10"],
    ["Merhaba, bu bir deneme çalışmasıdır.","tts_transformer-tr-cv7"],
    ["Xin chào, đây là một cuộc chạy thử nghiệm.","tts_transformer-vi-cv7"],
    ["مرحبًا ، هذا اختبار تشغيل.","tts_transformer-ar-cv7"],
    ["Hola, esta es una prueba.","tts_transformer-es-css10"]
]

io1 = gr.Interface.load("huggingface/facebook/fastspeech2-en-200_speaker-cv4")

io2 = gr.Interface.load("huggingface/facebook/tts_transformer-en-200_speaker-cv4")

io3 = gr.Interface.load("huggingface/facebook/tts_transformer-fr-cv7_css10")

io4 = gr.Interface.load("huggingface/facebook/tts_transformer-ru-cv7_css10")

io5 = gr.Interface.load("huggingface/facebook/tts_transformer-tr-cv7")

io6 = gr.Interface.load("huggingface/facebook/tts_transformer-vi-cv7")

io7 = gr.Interface.load("huggingface/facebook/tts_transformer-ar-cv7")

io8 = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10")


    
def inference(text,model):
   if model == "fastspeech2-en-200_speaker-cv4":
        outtext = io1(text)
   elif model == "tts_transformer-en-200_speaker-cv4":
        outtext = io2(text)
   elif model == "tts_transformer-fr-cv7_css10":
        outtext = io3(text)
   elif model == "tts_transformer-ru-cv7_css10":
        outtext = io4(text)
   elif model == "tts_transformer-tr-cv7":
        outtext = io5(text)
   elif model == "tts_transformer-vi-cv7":
        outtext = io6(text)
   elif model == "tts_transformer-ar-cv7":
        outtext = io7(text)
   else:
        outtext = io8(text)
   return outtext 


gr.Interface(
    inference, 
    [gr.inputs.Textbox(label="Input",lines=5),gr.inputs.Dropdown(choices=["fastspeech2-en-200_speaker-cv4","tts_transformer-en-200_speaker-cv4","tts_transformer-tr-cv7","tts_transformer-fr-cv7_css10","tts_transformer-ru-cv7_css10","tts_transformer-vi-cv7","tts_transformer-ar-cv7","tts_transformer-es-css10"], type="value", default="fastspeech2-en-200_speaker-cv4", label="model")
],
    gr.outputs.Audio(label="Output"),
    examples=examples,
    article=article,
    title=title,
    description=description).launch(enable_queue=True)