guymorlan commited on
Commit
c881be4
1 Parent(s): 87d0c3c

Update translit.py

Browse files
Files changed (1) hide show
  1. translit.py +11 -11
translit.py CHANGED
@@ -94,12 +94,12 @@ arabic_to_english = {
94
  "ُ": "u",
95
  "ِ": "i",
96
  "،": ",",
97
- "ֹ": "", # holam
98
- "ַ": "", # patah
99
- "ִ": "", # hiriq
100
  "ְ": "", # shva
101
- "ֻ": "", # kubutz
102
- 'ֵ': "", # tzere
103
  "ّ": "SHADDA" # shadda
104
  }
105
 
@@ -177,12 +177,12 @@ def to_taatik(arabic):
177
 
178
  def postprocess_arabic_transliteration(text):
179
 
180
- text = re.sub(r'(ā)(?!ā)', 'ā', text)
181
- text = re.sub(r'ⁱy(?!y)', 'ī', text)
182
- text = re.sub(r'ᵒw(?!w)', 'ō', text)
183
- text = re.sub(r'ᵘw(?!w)', 'ū', text)
184
- text = re.sub(r'ᵃy(?!y)', 'ē', text)
185
- text = re.sub(r'ᵃw(?!w)', 'ō', text)
186
  text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
187
 
188
 
 
94
  "ُ": "u",
95
  "ِ": "i",
96
  "،": ",",
97
+ "ֹ": "o", # holam
98
+ "ַ": "a", # patah
99
+ "ִ": "i", # hiriq
100
  "ְ": "", # shva
101
+ "ֻ": "u", # kubutz
102
+ 'ֵ': "e", # tzere
103
  "ّ": "SHADDA" # shadda
104
  }
105
 
 
177
 
178
  def postprocess_arabic_transliteration(text):
179
 
180
+ text = re.sub(r'a(ā)(?!ā)', 'ā', text)
181
+ text = re.sub(r'iy(?!y)', 'ī', text)
182
+ text = re.sub(r'ow(?!w)', 'ō', text)
183
+ text = re.sub(r'uw(?!w)', 'ū', text)
184
+ text = re.sub(r'ay(?!y)', 'ē', text)
185
+ text = re.sub(r'aw(?!w)', 'ō', text)
186
  text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
187
 
188