|
import gradio as gr |
|
import torch |
|
import scipy.io.wavfile as wavfile |
|
from transformers import AutoProcessor, SeamlessM4TModel |
|
|
|
tokenizer = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium") |
|
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium") |
|
|
|
text = "some example text in the English language" |
|
|
|
def greet(text): |
|
inputs = tokenizer(text, return_tensors="pt") |
|
with torch.no_grad(): |
|
output = model(**inputs, decoder_input_ids=inputs["input_ids"]).waveform |
|
out = output[0] |
|
wavfile.write("tmp.wav", rate=16000, data=out) |
|
return open("tmp.wav", "rb").read() |
|
|
|
iface = gr.Interface(fn=greet, inputs="text", outputs="audio") |
|
iface.launch() |