|
from pathlib import Path |
|
from typing import Iterable |
|
from typing import List |
|
from typing import Optional |
|
from typing import Union |
|
|
|
import g2p_en |
|
from typeguard import check_argument_types |
|
|
|
from espnet2.text.abs_tokenizer import AbsTokenizer |
|
|
|
|
|
def split_by_space(text) -> List[str]: |
|
return text.split(" ") |
|
|
|
|
|
def pyopenjtalk_g2p(text) -> List[str]: |
|
import pyopenjtalk |
|
|
|
|
|
phones = pyopenjtalk.g2p(text, kana=False) |
|
phones = phones.split(" ") |
|
return phones |
|
|
|
|
|
def pyopenjtalk_g2p_accent(text) -> List[str]: |
|
import pyopenjtalk |
|
import re |
|
|
|
phones = [] |
|
for labels in pyopenjtalk.run_frontend(text)[1]: |
|
p = re.findall(r"\-(.*?)\+.*?\/A:([0-9\-]+).*?\/F:.*?_([0-9])", labels) |
|
if len(p) == 1: |
|
phones += [p[0][0], p[0][2], p[0][1]] |
|
return phones |
|
|
|
|
|
def pyopenjtalk_g2p_accent_with_pause(text) -> List[str]: |
|
import pyopenjtalk |
|
import re |
|
|
|
phones = [] |
|
for labels in pyopenjtalk.run_frontend(text)[1]: |
|
if labels.split("-")[1].split("+")[0] == "pau": |
|
phones += ["pau"] |
|
continue |
|
p = re.findall(r"\-(.*?)\+.*?\/A:([0-9\-]+).*?\/F:.*?_([0-9])", labels) |
|
if len(p) == 1: |
|
phones += [p[0][0], p[0][2], p[0][1]] |
|
return phones |
|
|
|
|
|
def pyopenjtalk_g2p_kana(text) -> List[str]: |
|
import pyopenjtalk |
|
|
|
kanas = pyopenjtalk.g2p(text, kana=True) |
|
return list(kanas) |
|
|
|
|
|
def pypinyin_g2p(text) -> List[str]: |
|
from pypinyin import pinyin |
|
from pypinyin import Style |
|
|
|
phones = [phone[0] for phone in pinyin(text, style=Style.TONE3)] |
|
return phones |
|
|
|
|
|
def pypinyin_g2p_phone(text) -> List[str]: |
|
from pypinyin import pinyin |
|
from pypinyin import Style |
|
from pypinyin.style._utils import get_finals |
|
from pypinyin.style._utils import get_initials |
|
|
|
phones = [ |
|
p |
|
for phone in pinyin(text, style=Style.TONE3) |
|
for p in [ |
|
get_initials(phone[0], strict=True), |
|
get_finals(phone[0], strict=True), |
|
] |
|
if len(p) != 0 |
|
] |
|
return phones |
|
|
|
|
|
class G2p_en: |
|
"""On behalf of g2p_en.G2p. |
|
|
|
g2p_en.G2p isn't pickalable and it can't be copied to the other processes |
|
via multiprocessing module. |
|
As a workaround, g2p_en.G2p is instantiated upon calling this class. |
|
|
|
""" |
|
|
|
def __init__(self, no_space: bool = False): |
|
self.no_space = no_space |
|
self.g2p = None |
|
|
|
def __call__(self, text) -> List[str]: |
|
if self.g2p is None: |
|
self.g2p = g2p_en.G2p() |
|
|
|
phones = self.g2p(text) |
|
if self.no_space: |
|
|
|
phones = list(filter(lambda s: s != " ", phones)) |
|
return phones |
|
|
|
|
|
class Phonemizer: |
|
"""Phonemizer module for various languages. |
|
|
|
This is wrapper module of https://github.com/bootphon/phonemizer. |
|
You can define various g2p modules by specifying options for phonemizer. |
|
|
|
See available options: |
|
https://github.com/bootphon/phonemizer/blob/master/phonemizer/phonemize.py#L32 |
|
|
|
""" |
|
|
|
def __init__( |
|
self, |
|
word_separator: Optional[str] = None, |
|
syllable_separator: Optional[str] = None, |
|
**phonemize_kwargs, |
|
): |
|
|
|
from phonemizer import phonemize |
|
from phonemizer.separator import Separator |
|
|
|
self.phonemize = phonemize |
|
self.separator = Separator( |
|
word=word_separator, syllable=syllable_separator, phone=" " |
|
) |
|
self.phonemize_kwargs = phonemize_kwargs |
|
|
|
def __call__(self, text) -> List[str]: |
|
return self.phonemize( |
|
text, |
|
separator=self.separator, |
|
**self.phonemize_kwargs, |
|
).split() |
|
|
|
|
|
class PhonemeTokenizer(AbsTokenizer): |
|
def __init__( |
|
self, |
|
g2p_type: Union[None, str], |
|
non_linguistic_symbols: Union[Path, str, Iterable[str]] = None, |
|
space_symbol: str = "<space>", |
|
remove_non_linguistic_symbols: bool = False, |
|
): |
|
assert check_argument_types() |
|
if g2p_type is None: |
|
self.g2p = split_by_space |
|
elif g2p_type == "g2p_en": |
|
self.g2p = G2p_en(no_space=False) |
|
elif g2p_type == "g2p_en_no_space": |
|
self.g2p = G2p_en(no_space=True) |
|
elif g2p_type == "pyopenjtalk": |
|
self.g2p = pyopenjtalk_g2p |
|
elif g2p_type == "pyopenjtalk_kana": |
|
self.g2p = pyopenjtalk_g2p_kana |
|
elif g2p_type == "pyopenjtalk_accent": |
|
self.g2p = pyopenjtalk_g2p_accent |
|
elif g2p_type == "pyopenjtalk_accent_with_pause": |
|
self.g2p = pyopenjtalk_g2p_accent_with_pause |
|
elif g2p_type == "pypinyin_g2p": |
|
self.g2p = pypinyin_g2p |
|
elif g2p_type == "pypinyin_g2p_phone": |
|
self.g2p = pypinyin_g2p_phone |
|
elif g2p_type == "espeak_ng_arabic": |
|
self.g2p = Phonemizer(language="ar", backend="espeak", with_stress=True) |
|
else: |
|
raise NotImplementedError(f"Not supported: g2p_type={g2p_type}") |
|
|
|
self.g2p_type = g2p_type |
|
self.space_symbol = space_symbol |
|
if non_linguistic_symbols is None: |
|
self.non_linguistic_symbols = set() |
|
elif isinstance(non_linguistic_symbols, (Path, str)): |
|
non_linguistic_symbols = Path(non_linguistic_symbols) |
|
with non_linguistic_symbols.open("r", encoding="utf-8") as f: |
|
self.non_linguistic_symbols = set(line.rstrip() for line in f) |
|
else: |
|
self.non_linguistic_symbols = set(non_linguistic_symbols) |
|
self.remove_non_linguistic_symbols = remove_non_linguistic_symbols |
|
|
|
def __repr__(self): |
|
return ( |
|
f"{self.__class__.__name__}(" |
|
f'g2p_type="{self.g2p_type}", ' |
|
f'space_symbol="{self.space_symbol}", ' |
|
f'non_linguistic_symbols="{self.non_linguistic_symbols}"' |
|
f")" |
|
) |
|
|
|
def text2tokens(self, line: str) -> List[str]: |
|
tokens = [] |
|
while len(line) != 0: |
|
for w in self.non_linguistic_symbols: |
|
if line.startswith(w): |
|
if not self.remove_non_linguistic_symbols: |
|
tokens.append(line[: len(w)]) |
|
line = line[len(w) :] |
|
break |
|
else: |
|
t = line[0] |
|
tokens.append(t) |
|
line = line[1:] |
|
|
|
line = "".join(tokens) |
|
tokens = self.g2p(line) |
|
return tokens |
|
|
|
def tokens2text(self, tokens: Iterable[str]) -> str: |
|
|
|
return "".join(tokens) |
|
|