File size: 3,300 Bytes
3b1f0f3
 
 
 
 
5bd7020
 
 
 
 
 
 
 
4306d26
3b1f0f3
8a1ba2b
4306d26
8a1ba2b
4306d26
8a1ba2b
8aa19f9
3b1f0f3
 
 
 
8aa19f9
3b1f0f3
 
 
 
 
7779a3f
3b1f0f3
518f6c1
 
 
 
 
3b1f0f3
 
 
 
 
 
 
 
 
 
4306d26
3b1f0f3
 
 
8a1ba2b
3b1f0f3
 
 
 
4306d26
3b1f0f3
5bd7020
3b1f0f3
 
 
 
 
 
5bd7020
3b1f0f3
 
 
 
 
 
 
5bd7020
3b1f0f3
 
 
 
 
 
 
 
 
 
 
 
 
 
4306d26
3b1f0f3
 
 
 
 
 
 
 
 
4306d26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import random 
import gradio as gr 
import numpy as np 
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError

def pad_buffer(audio):
    # Pad buffer to multiple of 2 bytes
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

def generate_voice(text, voice_name, model_name):
    try:
        audio = generate(
            text[:250], # Limit to 250 characters
            voice=voice_name, 
            model=model_name
        )
        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
    except UnauthenticatedRateLimitError as e:
        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") 
    except Exception as e:
        raise gr.Error(e)
    

badges = """
<div style="display: flex">
<span style="margin-right: 5px"> 

[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/elevenlabs/elevenlabs-python)
 
</span>
<span style="margin-right: 5px"> 

[ ![Twitter](https://img.shields.io/badge/Twitter-%231DA1F2.svg?style=for-the-badge&logo=Twitter&logoColor=white) ](https://twitter.com/elevenlabsio)
 
</span>
<span>

[ ![](https://dcbadge.vercel.app/api/server/elevenlabs) ](https://discord.gg/elevenlabs)

</span>
</div>
"""

description = """
A demo of the world's most advanced TTS systems, made by [ElevenLabs](https://elevenlabs.io). Eleven Monolingual is designed to generate highly realistic voices in English, where Eleven Multilingual is a single model supporting multiple languages including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. Sign up on [ElevenLabs](https://elevenlabs.io) to get fast access, long-form generation, voice cloning, API keys, and more!
"""

with gr.Blocks() as block:
    gr.Markdown('[ ![ElevenLabs](https://raw.githubusercontent.com/elevenlabs/elevenlabs-python/main/LOGO.png) ](https://elevenlabs.io)')
    gr.Markdown(badges)
    gr.Markdown(description)
    
    input_text = gr.Textbox(
        label="Input Text (250 characters max)", 
        lines=2, 
        value="Hahaha OHH MY GOD! This is SOOO funny, I-I am Eleven and-and I am a text to speech system!!",
        elem_id="input_text"
    )

    all_voices = voices() 
    input_voice = gr.Dropdown(
        [ voice.name for voice in all_voices ], 
        value="Arnold",
        label="Voice", 
        elem_id="input_voice"
    )

    input_model = gr.Radio(
        ["eleven_monolingual_v1", "eleven_multilingual_v1"],
        label="Model",
        value="eleven_monolingual_v1",
        elem_id="input_model",
    )

    run_button = gr.Button(
        text="Generate Voice", 
        type="button"
    )

    out_audio = gr.Audio(
        label="Generated Voice",
        type="numpy", 
        elem_id="out_audio"
    )
        
    inputs = [input_text, input_voice, input_model]
    outputs = [out_audio]
    
    run_button.click(
        fn=generate_voice, 
        inputs=inputs, 
        outputs=outputs, 
        queue=True
    )

block.queue(concurrency_count=1).launch(debug=True)