Spaces:
Running
Running
import gradio as gr | |
from collections.abc import Iterable | |
from gruut import sentences | |
from phonemap.g2pphonemizer import G2PPhonemizer | |
from phonemap.charsiu import CharsiuPhonemizer | |
from openphonemizer import OpenPhonemizer | |
from cached_path import cached_path | |
opphonemizer = OpenPhonemizer() | |
opphonemizer_ar = OpenPhonemizer(str(cached_path('hf://openphonemizer/autoreg-ckpt/best_model.pt'))) | |
g2p = G2PPhonemizer() | |
charsiu = CharsiuPhonemizer() | |
import phonemizer | |
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore') | |
from nltk import word_tokenize | |
import nltk | |
from dp.phonemizer import Phonemizer | |
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt'))) | |
import re | |
from num2words import num2words | |
def replace_numbers_with_words(text): | |
pattern = r'\d+' | |
matches = re.findall(pattern, text) | |
for match in matches: | |
word_equivalent = num2words(int(match)) | |
text = text.replace(match, word_equivalent) | |
return text | |
nltk.download('punkt') | |
# Borrowed from https://github.com/sidharthrajaram/StyleTTS2/blob/main/src/styletts2/phoneme.py, MIT License | |
def gruut(text): | |
phonemized = [] | |
for sent in sentences(text, lang='en-us'): | |
for word in sent: | |
if isinstance(word.phonemes, Iterable): | |
phonemized.append(''.join(word.phonemes)) | |
elif isinstance(word.phonemes, str): | |
phonemized.append(word.phonemes) | |
phonemized_text = ' '.join(phonemized) | |
return phonemized_text | |
def g2pen(text): | |
return g2p.phonemize(text) | |
def docharsiu(text): | |
return charsiu.phonemize(text) | |
def phonemizerfunc(text): | |
text = text.strip() | |
text = text.replace('"', '') | |
ps = global_phonemizer.phonemize([text]) | |
ps = word_tokenize(ps[0]) | |
ps = ' '.join(ps) | |
return ps | |
def dp(text): | |
text = replace_numbers_with_words(text) | |
return phonemizer(text, lang='en_us') | |
def run_openphonemizer(text): | |
return opphonemizer(text) | |
def opar(text): | |
return opphonemizer_ar(text) | |
def run(t, r): | |
if r == 'phonemizer': return phonemizerfunc(t) | |
if r == 'openphonemizer': return run_openphonemizer(t) | |
if r == 'gruut': return gruut(t) | |
if r == 'deep_phonemizer': return dp(t) | |
if r == 'g2p_en (buggy)': return g2pen(t) | |
if r == 'openphonemizer autoregressive': return opar(t) | |
return f'Error: Phonemizer {r} not found, please try another Phonemizer and create a Community discussion on HF.' | |
with gr.Blocks() as demo: | |
gr.Markdown("# PhonemizerHub") | |
t = gr.Textbox(label="Text", placeholder="Enter text...", interactive=True) | |
r = gr.Radio(choices=['phonemizer', 'openphonemizer', 'openphonemizer autoregressive', 'gruut', 'deep_phonemizer', 'g2p_en (buggy)'], value='openphonemizer', interactive=True, label='Select a phonemizer') | |
b = gr.Button("Phonemize") | |
out = gr.Textbox(interactive=False, label="Phonemes") | |
b.click(run, inputs=[t, r], outputs=[out]) | |
demo.queue().launch() |