|
<html> |
|
<head> |
|
<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script> |
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" /> |
|
</head> |
|
<body> |
|
<gradio-lite> |
|
|
|
<gradio-requirements> |
|
transformers_js_py |
|
</gradio-requirements> |
|
|
|
<gradio-file name="app.py" entrypoint> |
|
from transformers_js import pipeline |
|
import gradio as gr |
|
import numpy as np |
|
|
|
speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'; |
|
|
|
synthesizer = await pipeline( |
|
'text-to-speech', |
|
'Xenova/speecht5_tts', |
|
{ "quantized": False } |
|
) |
|
|
|
|
|
async def synthesize(text): |
|
out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings }); |
|
audio_data_memory_view = out["audio"] |
|
sampling_rate = out["sampling_rate"] |
|
|
|
audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) |
|
audio_data_16bit = (audio_data * 32767).astype(np.int16) |
|
|
|
return sampling_rate, audio_data_16bit |
|
|
|
|
|
demo = gr.Interface(synthesize, "textbox", "audio") |
|
demo.launch() |
|
</gradio-file> |
|
|
|
</gradio-lite> |
|
|
|
</body> |
|
</html> |
|
|