transformers_js_py from transformers_js import pipeline import gradio as gr import numpy as np speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'; synthesizer = await pipeline( 'text-to-speech', 'Xenova/speecht5_tts', { "quantized": False } ) async def synthesize(text): out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings }); audio_data_memory_view = out["audio"] sampling_rate = out["sampling_rate"] audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) audio_data_16bit = (audio_data * 32767).astype(np.int16) return sampling_rate, audio_data_16bit demo = gr.Interface(synthesize, "textbox", "audio") demo.launch()