Spaces:
Running
Running
Update translit.py
Browse files- translit.py +11 -11
translit.py
CHANGED
@@ -94,12 +94,12 @@ arabic_to_english = {
|
|
94 |
"ُ": "u",
|
95 |
"ِ": "i",
|
96 |
"،": ",",
|
97 |
-
"ֹ": "
|
98 |
-
"ַ": "
|
99 |
-
"ִ": "
|
100 |
"ְ": "", # shva
|
101 |
-
"ֻ": "
|
102 |
-
'ֵ': "
|
103 |
"ّ": "SHADDA" # shadda
|
104 |
}
|
105 |
|
@@ -177,12 +177,12 @@ def to_taatik(arabic):
|
|
177 |
|
178 |
def postprocess_arabic_transliteration(text):
|
179 |
|
180 |
-
text = re.sub(r'
|
181 |
-
text = re.sub(r'
|
182 |
-
text = re.sub(r'
|
183 |
-
text = re.sub(r'
|
184 |
-
text = re.sub(r'
|
185 |
-
text = re.sub(r'
|
186 |
text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
|
187 |
|
188 |
|
|
|
94 |
"ُ": "u",
|
95 |
"ِ": "i",
|
96 |
"،": ",",
|
97 |
+
"ֹ": "o", # holam
|
98 |
+
"ַ": "a", # patah
|
99 |
+
"ִ": "i", # hiriq
|
100 |
"ְ": "", # shva
|
101 |
+
"ֻ": "u", # kubutz
|
102 |
+
'ֵ': "e", # tzere
|
103 |
"ّ": "SHADDA" # shadda
|
104 |
}
|
105 |
|
|
|
177 |
|
178 |
def postprocess_arabic_transliteration(text):
|
179 |
|
180 |
+
text = re.sub(r'a(ā)(?!ā)', 'ā', text)
|
181 |
+
text = re.sub(r'iy(?!y)', 'ī', text)
|
182 |
+
text = re.sub(r'ow(?!w)', 'ō', text)
|
183 |
+
text = re.sub(r'uw(?!w)', 'ū', text)
|
184 |
+
text = re.sub(r'ay(?!y)', 'ē', text)
|
185 |
+
text = re.sub(r'aw(?!w)', 'ō', text)
|
186 |
text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
|
187 |
|
188 |
|