File size: 3,300 Bytes
3b1f0f3 5bd7020 4306d26 3b1f0f3 8a1ba2b 4306d26 8a1ba2b 4306d26 8a1ba2b 8aa19f9 3b1f0f3 8aa19f9 3b1f0f3 7779a3f 3b1f0f3 518f6c1 3b1f0f3 4306d26 3b1f0f3 8a1ba2b 3b1f0f3 4306d26 3b1f0f3 5bd7020 3b1f0f3 5bd7020 3b1f0f3 5bd7020 3b1f0f3 4306d26 3b1f0f3 4306d26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import random
import gradio as gr
import numpy as np
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
def pad_buffer(audio):
# Pad buffer to multiple of 2 bytes
buffer_size = len(audio)
element_size = np.dtype(np.int16).itemsize
if buffer_size % element_size != 0:
audio = audio + b'\0' * (element_size - (buffer_size % element_size))
return audio
def generate_voice(text, voice_name, model_name):
try:
audio = generate(
text[:250], # Limit to 250 characters
voice=voice_name,
model=model_name
)
return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
except UnauthenticatedRateLimitError as e:
raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
except Exception as e:
raise gr.Error(e)
badges = """
<div style="display: flex">
<span style="margin-right: 5px">
[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/elevenlabs/elevenlabs-python)
</span>
<span style="margin-right: 5px">
[ ![Twitter](https://img.shields.io/badge/Twitter-%231DA1F2.svg?style=for-the-badge&logo=Twitter&logoColor=white) ](https://twitter.com/elevenlabsio)
</span>
<span>
[ ![](https://dcbadge.vercel.app/api/server/elevenlabs) ](https://discord.gg/elevenlabs)
</span>
</div>
"""
description = """
A demo of the world's most advanced TTS systems, made by [ElevenLabs](https://elevenlabs.io). Eleven Monolingual is designed to generate highly realistic voices in English, where Eleven Multilingual is a single model supporting multiple languages including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. Sign up on [ElevenLabs](https://elevenlabs.io) to get fast access, long-form generation, voice cloning, API keys, and more!
"""
with gr.Blocks() as block:
gr.Markdown('[ ![ElevenLabs](https://raw.githubusercontent.com/elevenlabs/elevenlabs-python/main/LOGO.png) ](https://elevenlabs.io)')
gr.Markdown(badges)
gr.Markdown(description)
input_text = gr.Textbox(
label="Input Text (250 characters max)",
lines=2,
value="Hahaha OHH MY GOD! This is SOOO funny, I-I am Eleven and-and I am a text to speech system!!",
elem_id="input_text"
)
all_voices = voices()
input_voice = gr.Dropdown(
[ voice.name for voice in all_voices ],
value="Arnold",
label="Voice",
elem_id="input_voice"
)
input_model = gr.Radio(
["eleven_monolingual_v1", "eleven_multilingual_v1"],
label="Model",
value="eleven_monolingual_v1",
elem_id="input_model",
)
run_button = gr.Button(
text="Generate Voice",
type="button"
)
out_audio = gr.Audio(
label="Generated Voice",
type="numpy",
elem_id="out_audio"
)
inputs = [input_text, input_voice, input_model]
outputs = [out_audio]
run_button.click(
fn=generate_voice,
inputs=inputs,
outputs=outputs,
queue=True
)
block.queue(concurrency_count=1).launch(debug=True) |