Spaces:

CjangCjengh
/

Shanghainese-TTS

Running

CjangCjengh

upload files

df370f8 about 2 years ago

3.28 kB

	import torch
	import librosa
	import commons
	import utils
	from models import SynthesizerTrn
	from text import text_to_sequence
	import numpy as np
	from mel_processing import spectrogram_torch
	import gradio as gr
	from text.cleaners import shanghainese_cleaners


	DEFAULT_TEXT='阿拉小人天天辣辣白相，书一眼也勿看，拿我急煞脱了。侬讲是𠲎？'


	def clean_text(text,ipa_input):
	if ipa_input:
	return shanghainese_cleaners(text)
	return text


	def get_text(text, hps, cleaned=False):
	if cleaned:
	text_norm = text_to_sequence(text, hps.symbols, [])
	else:
	text_norm = text_to_sequence(text, hps.symbols, hps.data.text_cleaners)
	if hps.data.add_blank:
	text_norm = commons.intersperse(text_norm, 0)
	text_norm = torch.LongTensor(text_norm)
	return text_norm


	def speech_synthesize(text, cleaned, length_scale):
	text=text.replace('\n','')
	stn_tst = get_text(text, hps_ms, cleaned)
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	sid = torch.LongTensor([0])
	audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
	return (hps_ms.data.sampling_rate, audio)


	if __name__=='__main__':
	hps_ms = utils.get_hparams_from_file('model/config.json')
	n_speakers = hps_ms.data.n_speakers
	n_symbols = len(hps_ms.symbols)
	speakers = hps_ms.speakers

	net_g_ms = SynthesizerTrn(
	n_symbols,
	hps_ms.data.filter_length // 2 + 1,
	hps_ms.train.segment_size // hps_ms.data.hop_length,
	n_speakers=n_speakers,
	**hps_ms.model)
	_ = net_g_ms.eval()
	utils.load_checkpoint('model/model.pth', net_g_ms)

	with gr.Blocks() as app:
	gr.Markdown('# Shanghainese Text to Speech\n'
	'![visitor badge](https://visitor-badge.glitch.me/badge?page_id=cjangcjengh.shanghainese-tts)')
	gr.Markdown('<center><b>If you don\'t know how to type, see examples <a href="https://huggingface.co/spaces/CjangCjengh/Shanghainese-TTS/raw/main/shanghainese_script.txt">HERE</a></b></center>')
	text_input = gr.TextArea(label='Text', placeholder='Type your text here',value=DEFAULT_TEXT)
	cleaned_text=gr.Checkbox(label='IPA Input',default=True)
	length_scale=gr.Slider(0.5,2,1,step=0.1,label='Speaking Speed',interactive=True)
	tts_button = gr.Button('Synthesize')
	audio_output = gr.Audio(label='Speech Synthesized')
	cleaned_text.change(clean_text,[text_input,cleaned_text],[text_input])
	tts_button.click(speech_synthesize,[text_input,cleaned_text,length_scale],[audio_output])
	gr.Markdown('## Based on\n'
	'- [https://github.com/jaywalnut310/vits](https://github.com/jaywalnut310/vits)\n\n'
	'## Dataset\n'
	'- [http://shh.dict.cn/](http://shh.dict.cn/)\n\n'
	'## Lexicon\n'
	'- [https://www.wugniu.com/](https://www.wugniu.com/)\n\n'
	'- [https://github.com/MaigoAkisame/MCPDict](https://github.com/MaigoAkisame/MCPDict)\n\n'
	'- [https://github.com/edward-martyr/rime-yahwe_zaonhe](https://github.com/edward-martyr/rime-yahwe_zaonhe)')

	app.launch()