Spaces:
Sleeping
Sleeping
File size: 977 Bytes
7694c84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
PADDING_TOKEN = '_pad_'
EOS_TOKEN = '_eos_'
DOUBLING_TOKEN = '_dbl_'
SEPARATOR_TOKEN = '_+_'
EOS_TOKENS = [SEPARATOR_TOKEN, EOS_TOKEN]
symbols = [
# special tokens
PADDING_TOKEN, # padding
EOS_TOKEN, # eos-token
'_sil_', # silence
DOUBLING_TOKEN, # doubling
SEPARATOR_TOKEN, # word separator
# consonants
'<', # hamza
'b', # baa'
't', # taa'
'^', # thaa'
'j', # jiim
'H', # Haa'
'x', # xaa'
'd', # daal
'*', # dhaal
'r', # raa'
'z', # zaay
's', # siin
'$', # shiin
'S', # Saad
'D', # Daad
'T', # Taa'
'Z', # Zhaa'
'E', # 3ayn
'g', # ghain
'f', # faa'
'q', # qaaf
'k', # kaaf
'l', # laam
'm', # miim
'n', # nuun
'h', # haa'
'w', # waaw
'y', # yaa'
'v', # /v/ for loanwords e.g. in u'fydyw': u'v i0 d y uu1',
# vowels
'a', # short
'u',
'i',
'aa', # long
'uu',
'ii',
]
|