# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/text_processing/symbols.py | |
PAD = "_" | |
PUNCTUATION = ';:,.!?¡¿—…"«»“” ' | |
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
IPA_LETTERS = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" | |
SYMBOLS = [PAD] + list(PUNCTUATION) + list(LETTERS) + list(IPA_LETTERS) | |
SPACE_ID = SYMBOLS.index(" ") | |
SYMBOL_TO_ID = {s: i for i, s in enumerate(SYMBOLS)} | |
ID_TO_SYMBOL = {i: s for i, s in enumerate(SYMBOLS)} | |