File size: 3,099 Bytes
8ee2458
a4ffacc
8ee2458
c1d7434
 
676946d
9e6872b
14589ae
9e6872b
676946d
c1d7434
 
8ee2458
 
 
 
 
 
 
6800fe7
 
8ee2458
6800fe7
 
 
 
 
 
 
8ee2458
 
 
 
 
 
 
 
 
 
 
 
 
c1d7434
 
 
 
9a35a6d
8ee2458
 
 
 
 
 
 
6800fe7
8ee2458
676946d
14589ae
9e6872b
 
9a35a6d
 
676946d
9a35a6d
 
64df066
9e6872b
c1d7434
8ee2458
9a35a6d
 
e7b728c
9a35a6d
64df066
9a35a6d
8ee2458
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from collections.abc import Iterable
from gruut import sentences
from phonemap.g2pphonemizer import G2PPhonemizer
from phonemap.charsiu import CharsiuPhonemizer
from openphonemizer import OpenPhonemizer
from cached_path import cached_path
opphonemizer = OpenPhonemizer()
opphonemizer_ar = OpenPhonemizer(str(cached_path('hf://openphonemizer/autoreg-ckpt/best_model.pt')))

g2p = G2PPhonemizer()
charsiu = CharsiuPhonemizer()
import phonemizer
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
from nltk import word_tokenize
import nltk

from dp.phonemizer import Phonemizer
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
import re
from num2words import num2words

def replace_numbers_with_words(text):
    pattern = r'\d+'
    matches = re.findall(pattern, text)
    for match in matches:
        word_equivalent = num2words(int(match))
        text = text.replace(match, word_equivalent)
    return text

nltk.download('punkt')
# Borrowed from https://github.com/sidharthrajaram/StyleTTS2/blob/main/src/styletts2/phoneme.py, MIT License
def gruut(text):
    phonemized = []
    for sent in sentences(text, lang='en-us'):
        for word in sent:
            if isinstance(word.phonemes, Iterable):
                phonemized.append(''.join(word.phonemes))
            elif isinstance(word.phonemes, str):
                phonemized.append(word.phonemes)
    phonemized_text = ' '.join(phonemized)
    return phonemized_text
def g2pen(text):
    return g2p.phonemize(text)
def docharsiu(text):
    return charsiu.phonemize(text)
def phonemizerfunc(text):
    text = text.strip()
    text = text.replace('"', '')
    ps = global_phonemizer.phonemize([text])
    ps = word_tokenize(ps[0])
    ps = ' '.join(ps)
    return ps
def dp(text):
    text = replace_numbers_with_words(text)
    return phonemizer(text, lang='en_us')
def run_openphonemizer(text):
    return opphonemizer(text)
def opar(text):
    return opphonemizer_ar(text)
def run(t, r):
    if r == 'phonemizer': return phonemizerfunc(t)
    if r == 'openphonemizer': return run_openphonemizer(t)
    if r == 'gruut': return gruut(t)
    if r == 'deep_phonemizer': return dp(t)
    if r == 'g2p_en (buggy)': return g2pen(t)
    if r == 'openphonemizer autoregressive': return opar(t)
    return f'Error: Phonemizer {r} not found, please try another Phonemizer and create a Community discussion on HF.'
with gr.Blocks() as demo:
    gr.Markdown("# PhonemizerHub")
    t = gr.Textbox(label="Text", placeholder="Enter text...", interactive=True)
    r = gr.Radio(choices=['phonemizer', 'openphonemizer', 'openphonemizer autoregressive', 'gruut', 'deep_phonemizer', 'g2p_en (buggy)'], value='openphonemizer', interactive=True, label='Select a phonemizer')
    b = gr.Button("Phonemize")
    out = gr.Textbox(interactive=False, label="Phonemes")
    b.click(run, inputs=[t, r], outputs=[out])
demo.queue().launch()