Spaces:
Running
Running
ORI-Muchim
commited on
Commit
•
bae2ee4
1
Parent(s):
155155e
Upload symbols.py
Browse files- text/symbols.py +75 -0
text/symbols.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Defines the set of symbols used in text input to the model.
|
3 |
+
'''
|
4 |
+
|
5 |
+
'''# japanese_cleaners
|
6 |
+
_pad = '_'
|
7 |
+
_punctuation = ',.!?-'
|
8 |
+
_letters = 'AEINOQUabdefghijkmnoprstuvwyzʃʧ↓↑ '
|
9 |
+
'''
|
10 |
+
|
11 |
+
# japanese_cleaners2
|
12 |
+
_pad = '_'
|
13 |
+
_punctuation = ',.!?-~…'
|
14 |
+
_letters = 'AEINOQUabdefghijkmnoprstuvwyzʃʧʦ↓↑ '
|
15 |
+
|
16 |
+
|
17 |
+
'''# korean_cleaners
|
18 |
+
_pad = '_'
|
19 |
+
_punctuation = ',.!?…~'
|
20 |
+
_letters = 'ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ '
|
21 |
+
'''
|
22 |
+
|
23 |
+
'''# chinese_cleaners
|
24 |
+
_pad = '_'
|
25 |
+
_punctuation = ',。!?—…'
|
26 |
+
_letters = 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩˉˊˇˋ˙ '
|
27 |
+
'''
|
28 |
+
|
29 |
+
'''# zh_ja_mixture_cleaners
|
30 |
+
_pad = '_'
|
31 |
+
_punctuation = ',.!?-~…'
|
32 |
+
_letters = 'AEINOQUabdefghijklmnoprstuvwyzʃʧʦɯɹəɥ⁼ʰ`→↓↑ '
|
33 |
+
'''
|
34 |
+
|
35 |
+
'''# sanskrit_cleaners
|
36 |
+
_pad = '_'
|
37 |
+
_punctuation = '।'
|
38 |
+
_letters = 'ँंःअआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसहऽािीुूृॄेैोौ्ॠॢ '
|
39 |
+
'''
|
40 |
+
|
41 |
+
'''# cjks_cleaners
|
42 |
+
_pad = '_'
|
43 |
+
_punctuation = ',.!?-~…'
|
44 |
+
_letters = 'NQabdefghijklmnopstuvwxyzʃʧʥʦɯɹəɥçɸɾβŋɦː⁼ʰ`^#*=→↓↑ '
|
45 |
+
'''
|
46 |
+
|
47 |
+
'''# thai_cleaners
|
48 |
+
_pad = '_'
|
49 |
+
_punctuation = '.!? '
|
50 |
+
_letters = 'กขฃคฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลวศษสหฬอฮฯะัาำิีึืุูเแโใไๅๆ็่้๊๋์'
|
51 |
+
'''
|
52 |
+
|
53 |
+
'''# cjke_cleaners2
|
54 |
+
_pad = '_'
|
55 |
+
_punctuation = ',.!?-~…'
|
56 |
+
_letters = 'NQabdefghijklmnopstuvwxyzɑæʃʑçɯɪɔɛɹðəɫɥɸʊɾʒθβŋɦ⁼ʰ`^#*=ˈˌ→↓↑ '
|
57 |
+
'''
|
58 |
+
|
59 |
+
'''# shanghainese_cleaners
|
60 |
+
_pad = '_'
|
61 |
+
_punctuation = ',.!?…'
|
62 |
+
_letters = 'abdfghiklmnopstuvyzøŋȵɑɔɕəɤɦɪɿʑʔʰ̩̃ᴀᴇ15678 '
|
63 |
+
'''
|
64 |
+
|
65 |
+
'''# chinese_dialect_cleaners
|
66 |
+
_pad = '_'
|
67 |
+
_punctuation = ',.!?~…─'
|
68 |
+
_letters = '#Nabdefghijklmnoprstuvwxyzæçøŋœȵɐɑɒɓɔɕɗɘəɚɛɜɣɤɦɪɭɯɵɷɸɻɾɿʂʅʊʋʌʏʑʔʦʮʰʷˀː˥˦˧˨˩̥̩̃̚αᴀᴇ↑↓∅ⱼ '
|
69 |
+
'''
|
70 |
+
|
71 |
+
# Export all symbols:
|
72 |
+
symbols = [_pad] + list(_punctuation) + list(_letters)
|
73 |
+
|
74 |
+
# Special symbol ids
|
75 |
+
SPACE_ID = symbols.index(" ")
|