Spaces:
Running
Running
ORI-Muchim
commited on
Commit
•
9e53507
1
Parent(s):
37e45ba
Delete text/cleaners.py
Browse files- text/cleaners.py +0 -24
text/cleaners.py
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
from text.korean import latin_to_hangul, number_to_hangul, divide_hangul, korean_to_lazy_ipa, korean_to_ipa
|
3 |
-
|
4 |
-
def cjke_cleaners2(text):
|
5 |
-
chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
|
6 |
-
japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
|
7 |
-
korean_texts = re.findall(r'\[KO\].*?\[KO\]', text)
|
8 |
-
english_texts = re.findall(r'\[EN\].*?\[EN\]', text)
|
9 |
-
for chinese_text in chinese_texts:
|
10 |
-
cleaned_text = chinese_to_ipa(chinese_text[4:-4])
|
11 |
-
text = text.replace(chinese_text, cleaned_text+' ', 1)
|
12 |
-
for japanese_text in japanese_texts:
|
13 |
-
cleaned_text = japanese_to_ipa2(japanese_text[4:-4])
|
14 |
-
text = text.replace(japanese_text, cleaned_text+' ', 1)
|
15 |
-
for korean_text in korean_texts:
|
16 |
-
cleaned_text = korean_to_ipa(korean_text[4:-4])
|
17 |
-
text = text.replace(korean_text, cleaned_text+' ', 1)
|
18 |
-
for english_text in english_texts:
|
19 |
-
cleaned_text = english_to_ipa2(english_text[4:-4])
|
20 |
-
text = text.replace(english_text, cleaned_text+' ', 1)
|
21 |
-
#text = text[:-1]
|
22 |
-
#if re.match(r'[^\.,!\?\-…~]', text[-1]):
|
23 |
-
text += '.'
|
24 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|