File size: 1,517 Bytes
871432d bde6902 871432d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
from transformers import VitsModel, AutoTokenizer, set_seed
import torch
import scipy.io.wavfile
from ruaccent import RUAccent
speakers={"woman": 0,
"man": 1}
model = VitsModel.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
tokenizer = AutoTokenizer.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
model.eval()
set_seed(555)
accentizer = RUAccent()
accentizer.load(omograph_model_size='turbo', use_dictionary=True)
def generate_audio(speaker_name, text):
text = accentizer.process_all(text)
inputs = tokenizer(text, return_tensors="pt")
inputs['speaker_id'] = speakers[speaker_name]
with torch.no_grad():
output = model(**inputs).waveform
scipy.io.wavfile.write("output.wav", rate=model.config.sampling_rate, data=output[0].cpu().numpy())
return "output.wav"
speaker_dropdown = gr.Dropdown(
choices=speakers,
label="Speaker id",
value='woman',
info=f"Models are trained on 2 speakers",
interactive=True
)
iface = gr.Interface(fn=generate_audio, inputs=[speaker_dropdown,"text"], outputs="audio", title="Text to Speech Russian free multispeaker model", description="Введите текст на русском языке, чтобы преобразовать его в русскую звуковую речь. Пример текста: Привет, как дела? А у тебя как?")
iface.launch(share=True)
|