PhonemizerHub / app.py
mrfakename's picture
update
e7b728c verified
import gradio as gr
from collections.abc import Iterable
from gruut import sentences
from phonemap.g2pphonemizer import G2PPhonemizer
from phonemap.charsiu import CharsiuPhonemizer
from openphonemizer import OpenPhonemizer
from cached_path import cached_path
opphonemizer = OpenPhonemizer()
opphonemizer_ar = OpenPhonemizer(str(cached_path('hf://openphonemizer/autoreg-ckpt/best_model.pt')))
g2p = G2PPhonemizer()
charsiu = CharsiuPhonemizer()
import phonemizer
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
from nltk import word_tokenize
import nltk
from dp.phonemizer import Phonemizer
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
import re
from num2words import num2words
def replace_numbers_with_words(text):
pattern = r'\d+'
matches = re.findall(pattern, text)
for match in matches:
word_equivalent = num2words(int(match))
text = text.replace(match, word_equivalent)
return text
nltk.download('punkt')
# Borrowed from https://github.com/sidharthrajaram/StyleTTS2/blob/main/src/styletts2/phoneme.py, MIT License
def gruut(text):
phonemized = []
for sent in sentences(text, lang='en-us'):
for word in sent:
if isinstance(word.phonemes, Iterable):
phonemized.append(''.join(word.phonemes))
elif isinstance(word.phonemes, str):
phonemized.append(word.phonemes)
phonemized_text = ' '.join(phonemized)
return phonemized_text
def g2pen(text):
return g2p.phonemize(text)
def docharsiu(text):
return charsiu.phonemize(text)
def phonemizerfunc(text):
text = text.strip()
text = text.replace('"', '')
ps = global_phonemizer.phonemize([text])
ps = word_tokenize(ps[0])
ps = ' '.join(ps)
return ps
def dp(text):
text = replace_numbers_with_words(text)
return phonemizer(text, lang='en_us')
def run_openphonemizer(text):
return opphonemizer(text)
def opar(text):
return opphonemizer_ar(text)
def run(t, r):
if r == 'phonemizer': return phonemizerfunc(t)
if r == 'openphonemizer': return run_openphonemizer(t)
if r == 'gruut': return gruut(t)
if r == 'deep_phonemizer': return dp(t)
if r == 'g2p_en (buggy)': return g2pen(t)
if r == 'openphonemizer autoregressive': return opar(t)
return f'Error: Phonemizer {r} not found, please try another Phonemizer and create a Community discussion on HF.'
with gr.Blocks() as demo:
gr.Markdown("# PhonemizerHub")
t = gr.Textbox(label="Text", placeholder="Enter text...", interactive=True)
r = gr.Radio(choices=['phonemizer', 'openphonemizer', 'openphonemizer autoregressive', 'gruut', 'deep_phonemizer', 'g2p_en (buggy)'], value='openphonemizer', interactive=True, label='Select a phonemizer')
b = gr.Button("Phonemize")
out = gr.Textbox(interactive=False, label="Phonemes")
b.click(run, inputs=[t, r], outputs=[out])
demo.queue().launch()