|
formal_to_informal = { |
|
|
|
|
|
|
|
'ワタクシ': 'わたし', |
|
'チカコ':'しゅうこ', |
|
"タノヒト":"ほかのひと", |
|
|
|
|
|
} |
|
|
|
formal_to_informal2 = { |
|
|
|
"たのひと":"ほかのひと", |
|
"すうは": "かずは", |
|
|
|
|
|
|
|
} |
|
|
|
formal_to_informal3 = { |
|
|
|
"%":"%", |
|
"@": "あっとさいん", |
|
"$":"どる", |
|
"#":"はっしゅたぐ", |
|
"$":"どる", |
|
"#":"はっしゅたぐ", |
|
"何が":"なにが", |
|
|
|
"何も":"なにも", |
|
"何か":"なにか", |
|
|
|
"何は":"なにが", |
|
"お父様":"おとうさま", |
|
"お兄様":"おにいさま", |
|
"何を":"なにを", |
|
"良い":"いい", |
|
"李衣菜":"りいな", |
|
"志希":"しき", |
|
"種":"たね", |
|
"方々":"かたがた", |
|
"颯":"はやて", |
|
"茄子さん":"かこさん", |
|
"茄子ちゃん":"かこちゃん", |
|
"涼ちゃん":"りょうちゃん", |
|
"涼さん":"りょうさん", |
|
"紗枝":"さえ", |
|
"文香":"ふみか", |
|
"私":"わたし", |
|
"周子":"しゅうこ", |
|
"イェ":"いえ", |
|
"可憐":"かれん", |
|
"加蓮":"かれん", |
|
"・":".", |
|
|
|
|
|
"唯さん":"ゆいさん", |
|
"唯ちゃん":"ゆいちゃん", |
|
"聖ちゃん":"ひじりちゃん", |
|
"他の":"ほかの", |
|
"他に":"ほかに", |
|
"一生懸命":"いっしょうけんめい", |
|
"楓さん":"かえでさん", |
|
"楓ちゃん":"かえでちゃん", |
|
"内から":"ないから", |
|
"の下で":"のしたで", |
|
|
|
} |
|
|
|
|
|
mapper = dict([ |
|
|
|
("仕方","しかた"), |
|
("明日","あした"), |
|
('私',"わたし"), |
|
("従妹","いとこ"), |
|
|
|
("1人","ひとり"), |
|
("2人","ふたり"), |
|
|
|
("一期","いちご"), |
|
("一会","いちえ"), |
|
|
|
("♪","!"), |
|
("?","?"), |
|
|
|
("どんな方","どんなかた"), |
|
("ふたり暮らし","ふたりぐらし"), |
|
|
|
("新年","しんねん"), |
|
("来年","らいねん"), |
|
("去年","きょねん"), |
|
("壮年","そうねん"), |
|
("今年","ことし"), |
|
|
|
("昨年","さくねん"), |
|
("本年","ほんねん"), |
|
("平年","へいねん"), |
|
("閏年","うるうどし"), |
|
("初年","しょねん"), |
|
("少年","しょうねん"), |
|
("多年","たねん"), |
|
("青年","せいねん"), |
|
("中年","ちゅうねん"), |
|
("老年","ろうねん"), |
|
("成年","せいねん"), |
|
("幼年","ようねん"), |
|
("前年","ぜんねん"), |
|
("元年","がんねん"), |
|
("経年","けいねん"), |
|
("当年","とうねん"), |
|
|
|
("明年","みょうねん"), |
|
("歳年","さいねん"), |
|
("数年","すうねん"), |
|
("半年","はんとし"), |
|
("後年","こうねん"), |
|
("実年","じつねん"), |
|
("年年","ねんねん"), |
|
("連年","れんねん"), |
|
("暦年","れきねん"), |
|
("各年","かくねん"), |
|
("全年","ぜんねん"), |
|
|
|
("年を","としを"), |
|
("年が","としが"), |
|
("年も","としも"), |
|
("年は","としは"), |
|
|
|
|
|
("奏ちゃん","かなでちゃん"), |
|
("負けず嫌い","まけずぎらい"), |
|
("貴方","あなた"), |
|
("貴女","あなた"), |
|
("貴男","あなた"), |
|
|
|
("その節","そのせつ"), |
|
|
|
("何し","なにし"), |
|
("何する","なにする"), |
|
|
|
("心さん","しんさん"), |
|
("心ちゃん","しんちゃん"), |
|
|
|
("乃々","のの"), |
|
|
|
("身体の","からだの"), |
|
("身体が","からだが"), |
|
("身体を","からだを"), |
|
("身体は","からだは"), |
|
("身体に","からだに"), |
|
("正念場","しょうねんば"), |
|
("言う","いう"), |
|
|
|
|
|
("一回","いっかい"), |
|
("一曲","いっきょく"), |
|
("一日","いちにち"), |
|
("一言","ひとこと"), |
|
("一杯","いっぱい"), |
|
|
|
|
|
("方が","ほうが"), |
|
("縦輪城","じゅうりんしろ"), |
|
("深息","しんそく"), |
|
("家人","かじん"), |
|
("お返し","おかえし"), |
|
("化物語","ばけものがたり"), |
|
("阿良々木暦","あららぎこよみ"), |
|
("何より","なにより") |
|
|
|
|
|
]) |
|
|
|
|
|
|
|
all_transformations = {**formal_to_informal, **formal_to_informal2, **formal_to_informal3, **mapper} |
|
|
|
def apply_transformations(text, transformations = all_transformations): |
|
for key, value in transformations.items(): |
|
text = text.replace(key, value) |
|
return text |
|
|
|
import re |
|
|
|
def number_to_japanese(num): |
|
if not isinstance(num, int) or num < 0 or num > 9999: |
|
return "Invalid input" |
|
|
|
digits = ["", "いち", "に", "さん", "よん", "ご", "ろく", "なな", "はち", "きゅう"] |
|
tens = ["", "じゅう", "にじゅう", "さんじゅう", "よんじゅう", "ごじゅう", "ろくじゅう", "ななじゅう", "はちじゅう", "きゅうじゅう"] |
|
hundreds = ["", "ひゃく", "にひゃく", "さんびゃく", "よんひゃく", "ごひゃく", "ろっぴゃく", "ななひゃく", "はっぴゃく", "きゅうひゃく"] |
|
thousands = ["", "せん", "にせん", "さんぜん", "よんせん", "ごせん", "ろくせん", "ななせん", "はっせん", "きゅうせん"] |
|
|
|
if num == 0: |
|
return "ゼロ" |
|
|
|
result = "" |
|
if num >= 1000: |
|
result += thousands[num // 1000] |
|
num %= 1000 |
|
if num >= 100: |
|
result += hundreds[num // 100] |
|
num %= 100 |
|
if num >= 10: |
|
result += tens[num // 10] |
|
num %= 10 |
|
if num > 0: |
|
result += digits[num] |
|
|
|
return result |
|
|
|
def convert_numbers_in_string(input_string): |
|
|
|
number_pattern = re.compile(r'\d+') |
|
|
|
|
|
def replace_with_japanese(match): |
|
num = int(match.group()) |
|
return number_to_japanese(num) |
|
|
|
|
|
converted_string = number_pattern.sub(replace_with_japanese, input_string) |
|
return converted_string |
|
|
|
|
|
roma_mapper = dict([ |
|
|
|
|
|
|
|
("my","mʲ"), |
|
("by","bʲ"), |
|
("ny","nʲ"), |
|
("ry","rʲ"), |
|
("si","sʲ"), |
|
("ky","kʲ"), |
|
("gy","gʲ"), |
|
("dy","dʲ"), |
|
("di","dʲ"), |
|
("fi","fʲ"), |
|
("fy","fʲ"), |
|
("ch","tɕ"), |
|
("sh","ɕ"), |
|
|
|
|
|
|
|
("a","a"), |
|
("i","i"), |
|
("u","ɯ"), |
|
("e","e"), |
|
("o","o"), |
|
("ka","ka"), |
|
("ki","ki"), |
|
("ku","kɯ"), |
|
("ke","ke"), |
|
("ko","ko"), |
|
("sa","sa"), |
|
("shi","ɕi"), |
|
("su","sɯ"), |
|
("se","se"), |
|
("so","so"), |
|
("ta","ta"), |
|
("chi","tɕi"), |
|
("tsu","tsɯ"), |
|
("te","te"), |
|
("to","to"), |
|
("na","na"), |
|
("ni","ni"), |
|
("nu","nɯ"), |
|
("ne","ne"), |
|
("no","no"), |
|
("ha","ha"), |
|
("hi","çi"), |
|
("fu","ɸɯ"), |
|
("he","he"), |
|
("ho","ho"), |
|
("ma","ma"), |
|
("mi","mi"), |
|
("mu","mɯ"), |
|
("me","me"), |
|
("mo","mo"), |
|
("ra","ɽa"), |
|
("ri","ɽi"), |
|
("ru","ɽɯ"), |
|
("re","ɽe"), |
|
("ro","ɽo"), |
|
("ga","ga"), |
|
("gi","gi"), |
|
("gu","gɯ"), |
|
("ge","ge"), |
|
("go","go"), |
|
("za","za"), |
|
("ji","dʑi"), |
|
("zu","zɯ"), |
|
("ze","ze"), |
|
("zo","zo"), |
|
("da","da"), |
|
|
|
|
|
("zu","zɯ"), |
|
("de","de"), |
|
("do","do"), |
|
("ba","ba"), |
|
("bi","bi"), |
|
("bu","bɯ"), |
|
("be","be"), |
|
("bo","bo"), |
|
("pa","pa"), |
|
("pi","pi"), |
|
("pu","pɯ"), |
|
("pe","pe"), |
|
("po","po"), |
|
("ya","ja"), |
|
("yu","jɯ"), |
|
("yo","jo"), |
|
("wa","wa"), |
|
|
|
|
|
|
|
|
|
("a","a"), |
|
("i","i"), |
|
("u","ɯ"), |
|
("e","e"), |
|
("o","o"), |
|
("wa","wa"), |
|
("o","o"), |
|
|
|
|
|
("wo","o")]) |
|
|
|
nasal_sound = dict([ |
|
|
|
("ɴm","mm"), |
|
("ɴb", "mb"), |
|
("ɴp", "mp"), |
|
|
|
|
|
("ɴk","ŋk"), |
|
("ɴg", "ŋg"), |
|
|
|
|
|
("ɴt","nt"), |
|
("ɴd", "nd"), |
|
("ɴn","nn"), |
|
("ɴs", "ns"), |
|
("ɴz","nz"), |
|
("ɴɽ", "nɽ"), |
|
|
|
("ɴɲ", "ɲɲ"), |
|
|
|
]) |
|
|
|
def Roma2IPA(text): |
|
orig = text |
|
|
|
for k, v in roma_mapper.items(): |
|
text = text.replace(k, v) |
|
|
|
return text |
|
|
|
def nasal_mapper(text): |
|
orig = text |
|
|
|
|
|
for k, v in nasal_sound.items(): |
|
text = text.replace(k, v) |
|
|
|
return text |
|
|
|
def alphabetreading(text): |
|
alphabet_dict = {"A": "エイ", |
|
"B": "ビー", |
|
"C": "シー", |
|
"D": "ディー", |
|
"E": "イー", |
|
"F": "エフ", |
|
"G": "ジー", |
|
"H": "エイチ", |
|
"I":"アイ", |
|
"J":"ジェイ", |
|
"K":"ケイ", |
|
"L":"エル", |
|
"M":"エム", |
|
"N":"エヌ", |
|
"O":"オー", |
|
"P":"ピー", |
|
"Q":"キュー", |
|
"R":"アール", |
|
"S":"エス", |
|
"T":"ティー", |
|
"U":"ユー", |
|
"V":"ヴィー", |
|
"W":"ダブリュー", |
|
"X":"エックス", |
|
"Y":"ワイ", |
|
"Z":"ゼッド"} |
|
text = text.upper() |
|
text_ret = "" |
|
for t in text: |
|
if t in alphabet_dict: |
|
text_ret += alphabet_dict[t] |
|
else: |
|
text_ret += t |
|
return text_ret |
|
|
|
import re |
|
import cutlet |
|
|
|
roma_mapper_plus_2 = { |
|
|
|
"bjo":'bʲo', |
|
"rjo":"rʲo", |
|
"kjo":"kʲo", |
|
"kyu":"kʲu", |
|
|
|
} |
|
|
|
def replace_repeated_chars(input_string): |
|
result = [] |
|
i = 0 |
|
while i < len(input_string): |
|
if i + 1 < len(input_string) and input_string[i] == input_string[i + 1] and input_string[i] in 'aiueo': |
|
result.append(input_string[i] + 'ː') |
|
i += 2 |
|
else: |
|
result.append(input_string[i]) |
|
i += 1 |
|
return ''.join(result) |
|
|
|
|
|
def replace_chars_2(text, mapping=roma_mapper_plus_2): |
|
|
|
|
|
sorted_keys = sorted(mapping.keys(), key=len, reverse=True) |
|
|
|
pattern = '|'.join(re.escape(key) for key in sorted_keys) |
|
|
|
|
|
def replace(match): |
|
key = match.group(0) |
|
return mapping.get(key, key) |
|
|
|
return re.sub(pattern, replace, text) |
|
|
|
|
|
def replace_tashdid_2(s): |
|
vowels = 'aiueoɯ0123456789.?!_。؟?!...@@##$$%%^^&&**()()_+=[「」]></\`~~―ー∺"' |
|
result = [] |
|
|
|
i = 0 |
|
while i < len(s): |
|
if i < len(s) - 2 and s[i].lower() == s[i + 2].lower() and s[i].lower() not in vowels and s[i + 1] == ' ': |
|
result.append('ʔ') |
|
result.append(s[i + 2]) |
|
i += 3 |
|
elif i < len(s) - 1 and s[i].lower() == s[i + 1].lower() and s[i].lower() not in vowels: |
|
result.append('ʔ') |
|
result.append(s[i + 1]) |
|
i += 2 |
|
else: |
|
result.append(s[i]) |
|
i += 1 |
|
|
|
return ''.join(result) |
|
|
|
def replace_tashdid(input_string): |
|
result = [] |
|
i = 0 |
|
while i < len(input_string): |
|
if i + 1 < len(input_string) and input_string[i] == input_string[i + 1] and input_string[i] not in 'aiueo': |
|
result.append('ʔ') |
|
result.append(input_string[i]) |
|
i += 2 |
|
else: |
|
result.append(input_string[i]) |
|
i += 1 |
|
return ''.join(result) |
|
|
|
def hira2ipa(text, roma_mapper=roma_mapper): |
|
keys_set = set(roma_mapper.keys()) |
|
special_rule = ("n", "ɴ") |
|
|
|
transformed_text = [] |
|
i = 0 |
|
|
|
while i < len(text): |
|
if text[i] == special_rule[0]: |
|
if i + 1 == len(text) or text[i + 1] not in keys_set: |
|
transformed_text.append(special_rule[1]) |
|
else: |
|
transformed_text.append(text[i]) |
|
else: |
|
transformed_text.append(text[i]) |
|
|
|
i += 1 |
|
|
|
return ''.join(transformed_text) |
|
|
|
import re |
|
|
|
|
|
k_mapper = dict([ |
|
("ゔぁ","ba"), |
|
("ゔぃ","bi"), |
|
("ゔぇ","be"), |
|
("ゔぉ","bo"), |
|
("ゔゃ","bʲa"), |
|
("ゔゅ","bʲɯ"), |
|
("ゔゃ","bʲa"), |
|
("ゔょ","bʲo"), |
|
|
|
("ゔ","bɯ"), |
|
|
|
("あぁ"," aː"), |
|
("いぃ"," iː"), |
|
("いぇ"," je"), |
|
("いゃ"," ja"), |
|
("うぅ"," ɯː"), |
|
("えぇ"," eː"), |
|
("おぉ"," oː"), |
|
("かぁ"," kaː"), |
|
("きぃ"," kiː"), |
|
("くぅ","kɯː"), |
|
("くゃ","ka"), |
|
("くゅ","kʲɯ"), |
|
("くょ","kʲo"), |
|
("けぇ","keː"), |
|
("こぉ","koː"), |
|
("がぁ","gaː"), |
|
("ぎぃ","giː"), |
|
("ぐぅ","gɯː"), |
|
("ぐゃ","gʲa"), |
|
("ぐゅ","gʲɯ"), |
|
("ぐょ","gʲo"), |
|
("げぇ","geː"), |
|
("ごぉ","goː"), |
|
("さぁ","saː"), |
|
("しぃ","ɕiː"), |
|
("すぅ","sɯː"), |
|
("すゃ","sʲa"), |
|
("すゅ","sʲɯ"), |
|
("すょ","sʲo"), |
|
("せぇ","seː"), |
|
("そぉ","soː"), |
|
("ざぁ","zaː"), |
|
("じぃ","dʑiː"), |
|
("ずぅ","zɯː"), |
|
("ずゃ","zʲa"), |
|
("ずゅ","zʲɯ"), |
|
("ずょ","zʲo"), |
|
("ぜぇ","zeː"), |
|
("ぞぉ","zeː"), |
|
("たぁ","taː"), |
|
("ちぃ","tɕiː"), |
|
("つぁ","tsa"), |
|
("つぃ","tsi"), |
|
("つぅ","tsɯː"), |
|
("つゃ","tɕa"), |
|
("つゅ","tɕɯ"), |
|
("つょ","tɕo"), |
|
("つぇ","tse"), |
|
("つぉ","tso"), |
|
("てぇ","teː"), |
|
("とぉ","toː"), |
|
("だぁ","daː"), |
|
("ぢぃ","dʑiː"), |
|
("づぅ","dɯː"), |
|
("づゃ","zʲa"), |
|
("づゅ","zʲɯ"), |
|
("づょ","zʲo"), |
|
("でぇ","deː"), |
|
("どぉ","doː"), |
|
("なぁ","naː"), |
|
("にぃ","niː"), |
|
("ぬぅ","nɯː"), |
|
("ぬゃ","nʲa"), |
|
("ぬゅ","nʲɯ"), |
|
("ぬょ","nʲo"), |
|
("ねぇ","neː"), |
|
("のぉ","noː"), |
|
("はぁ","haː"), |
|
("ひぃ","çiː"), |
|
("ふぅ","ɸɯː"), |
|
("ふゃ","ɸʲa"), |
|
("ふゅ","ɸʲɯ"), |
|
("ふょ","ɸʲo"), |
|
("へぇ","heː"), |
|
("ほぉ","hoː"), |
|
("ばぁ","baː"), |
|
("びぃ","biː"), |
|
("ぶぅ","bɯː"), |
|
("ふゃ","ɸʲa"), |
|
("ぶゅ","bʲɯ"), |
|
("ふょ","ɸʲo"), |
|
("べぇ","beː"), |
|
("ぼぉ","boː"), |
|
("ぱぁ","paː"), |
|
("ぴぃ","piː"), |
|
("ぷぅ","pɯː"), |
|
("ぷゃ","pʲa"), |
|
("ぷゅ","pʲɯ"), |
|
("ぷょ","pʲo"), |
|
("ぺぇ","peː"), |
|
("ぽぉ","poː"), |
|
("まぁ","maː"), |
|
("みぃ","miː"), |
|
("むぅ","mɯː"), |
|
("むゃ","mʲa"), |
|
("むゅ","mʲɯ"), |
|
("むょ","mʲo"), |
|
("めぇ","meː"), |
|
("もぉ","moː"), |
|
("やぁ","jaː"), |
|
("ゆぅ","jɯː"), |
|
("ゆゃ","jaː"), |
|
("ゆゅ","jɯː"), |
|
("ゆょ","joː"), |
|
("よぉ","joː"), |
|
("らぁ","ɽaː"), |
|
("りぃ","ɽiː"), |
|
("るぅ","ɽɯː"), |
|
("るゃ","ɽʲa"), |
|
("るゅ","ɽʲɯ"), |
|
("るょ","ɽʲo"), |
|
("れぇ","ɽeː"), |
|
("ろぉ","ɽoː"), |
|
("わぁ","ɯaː"), |
|
("をぉ","oː"), |
|
|
|
("う゛","bɯ"), |
|
("でぃ","di"), |
|
("でぇ","deː"), |
|
("でゃ","dʲa"), |
|
("でゅ","dʲɯ"), |
|
("でょ","dʲo"), |
|
("てぃ","ti"), |
|
("てぇ","teː"), |
|
("てゃ","tʲa"), |
|
("てゅ","tʲɯ"), |
|
("てょ","tʲo"), |
|
("すぃ","si"), |
|
("ずぁ","zɯa"), |
|
("ずぃ","zi"), |
|
("ずぅ","zɯ"), |
|
("ずゃ","zʲa"), |
|
("ずゅ","zʲɯ"), |
|
("ずょ","zʲo"), |
|
("ずぇ","ze"), |
|
("ずぉ","zo"), |
|
("きゃ","kʲa"), |
|
("きゅ","kʲɯ"), |
|
("きょ","kʲo"), |
|
("しゃ","ɕʲa"), |
|
("しゅ","ɕʲɯ"), |
|
("しぇ","ɕʲe"), |
|
("しょ","ɕʲo"), |
|
("ちゃ","tɕa"), |
|
("ちゅ","tɕɯ"), |
|
("ちぇ","tɕe"), |
|
("ちょ","tɕo"), |
|
("とぅ","tɯ"), |
|
("とゃ","tʲa"), |
|
("とゅ","tʲɯ"), |
|
("とょ","tʲo"), |
|
("どぁ","doa"), |
|
("どぅ","dɯ"), |
|
("どゃ","dʲa"), |
|
("どゅ","dʲɯ"), |
|
("どょ","dʲo"), |
|
("どぉ","doː"), |
|
("にゃ","nʲa"), |
|
("にゅ","nʲɯ"), |
|
("にょ","nʲo"), |
|
("ひゃ","çʲa"), |
|
("ひゅ","çʲɯ"), |
|
("ひょ","çʲo"), |
|
("みゃ","mʲa"), |
|
("みゅ","mʲɯ"), |
|
("みょ","mʲo"), |
|
("りゃ","ɽʲa"), |
|
("りぇ","ɽʲe"), |
|
("りゅ","ɽʲɯ"), |
|
("りょ","ɽʲo"), |
|
("ぎゃ","gʲa"), |
|
("ぎゅ","gʲɯ"), |
|
("ぎょ","gʲo"), |
|
("ぢぇ","dʑe"), |
|
("ぢゃ","dʑa"), |
|
("ぢゅ","dʑɯ"), |
|
("ぢょ","dʑo"), |
|
("じぇ","dʑe"), |
|
("じゃ","dʑa"), |
|
("じゅ","dʑɯ"), |
|
("じょ","dʑo"), |
|
("びゃ","bʲa"), |
|
("びゅ","bʲɯ"), |
|
("びょ","bʲo"), |
|
("ぴゃ","pʲa"), |
|
("ぴゅ","pʲɯ"), |
|
("ぴょ","pʲo"), |
|
("うぁ","ɯa"), |
|
("うぃ","ɯi"), |
|
("うぇ","ɯe"), |
|
("うぉ","ɯo"), |
|
("うゃ","ɯʲa"), |
|
("うゅ","ɯʲɯ"), |
|
("うょ","ɯʲo"), |
|
("ふぁ","ɸa"), |
|
("ふぃ","ɸi"), |
|
("ふぅ","ɸɯ"), |
|
("ふゃ","ɸʲa"), |
|
("ふゅ","ɸʲɯ"), |
|
("ふょ","ɸʲo"), |
|
("ふぇ","ɸe"), |
|
("ふぉ","ɸo"), |
|
|
|
("あ"," a"), |
|
("い"," i"), |
|
("う","ɯ"), |
|
("え"," e"), |
|
("お"," o"), |
|
("か"," ka"), |
|
("き"," ki"), |
|
("く"," kɯ"), |
|
("け"," ke"), |
|
("こ"," ko"), |
|
("さ"," sa"), |
|
("し"," ɕi"), |
|
("す"," sɯ"), |
|
("せ"," se"), |
|
("そ"," so"), |
|
("た"," ta"), |
|
("ち"," tɕi"), |
|
("つ"," tsɯ"), |
|
("て"," te"), |
|
("と"," to"), |
|
("な"," na"), |
|
("に"," ni"), |
|
("ぬ"," nɯ"), |
|
("ね"," ne"), |
|
("の"," no"), |
|
("は"," ha"), |
|
("ひ"," çi"), |
|
("ふ"," ɸɯ"), |
|
("へ"," he"), |
|
("ほ"," ho"), |
|
("ま"," ma"), |
|
("み"," mi"), |
|
("む"," mɯ"), |
|
("め"," me"), |
|
("も"," mo"), |
|
("ら"," ɽa"), |
|
("り"," ɽi"), |
|
("る"," ɽɯ"), |
|
("れ"," ɽe"), |
|
("ろ"," ɽo"), |
|
("が"," ga"), |
|
("ぎ"," gi"), |
|
("ぐ"," gɯ"), |
|
("げ"," ge"), |
|
("ご"," go"), |
|
("ざ"," za"), |
|
("じ"," dʑi"), |
|
("ず"," zɯ"), |
|
("ぜ"," ze"), |
|
("ぞ"," zo"), |
|
("だ"," da"), |
|
("ぢ"," dʑi"), |
|
("づ"," zɯ"), |
|
("で"," de"), |
|
("ど"," do"), |
|
("ば"," ba"), |
|
("び"," bi"), |
|
("ぶ"," bɯ"), |
|
("べ"," be"), |
|
("ぼ"," bo"), |
|
("ぱ"," pa"), |
|
("ぴ"," pi"), |
|
("ぷ"," pɯ"), |
|
("ぺ"," pe"), |
|
("ぽ"," po"), |
|
("や"," ja"), |
|
("ゆ"," jɯ"), |
|
("よ"," jo"), |
|
("わ"," wa"), |
|
("ゐ"," i"), |
|
("ゑ"," e"), |
|
("ん"," ɴ"), |
|
("っ"," ʔ"), |
|
("ー"," ː"), |
|
|
|
("ぁ"," a"), |
|
("ぃ"," i"), |
|
("ぅ"," ɯ"), |
|
("ぇ"," e"), |
|
("ぉ"," o"), |
|
("ゎ"," ɯa"), |
|
("ぉ"," o"), |
|
("っ","?"), |
|
|
|
("を","o") |
|
|
|
]) |
|
|
|
|
|
def post_fix(text): |
|
orig = text |
|
|
|
for k, v in k_mapper.items(): |
|
text = text.replace(k, v) |
|
|
|
return text |
|
|
|
|
|
|
|
|
|
sym_ws = dict([ |
|
|
|
("$ ","dorɯ"), |
|
("$ ","dorɯ"), |
|
|
|
("〇 ","marɯ"), |
|
("¥ ","eɴ"), |
|
|
|
("# ","haʔɕɯ tagɯ"), |
|
("# ","haʔɕɯ tagɯ"), |
|
|
|
("& ","ando"), |
|
("& ","ando"), |
|
|
|
("% ","paːsento"), |
|
("% ","paːsento"), |
|
|
|
("@ ","aʔto saiɴ"), |
|
("@ ","aʔto saiɴ") |
|
|
|
|
|
|
|
]) |
|
|
|
def random_sym_fix(text): |
|
orig = text |
|
|
|
for k, v in sym_ws.items(): |
|
text = text.replace(k, f" {v} ") |
|
|
|
return text |
|
|
|
|
|
sym_ns = dict([ |
|
|
|
("$","dorɯ"), |
|
("$","dorɯ"), |
|
|
|
("〇","marɯ"), |
|
("¥","eɴ"), |
|
|
|
("#","haʔɕɯ tagɯ"), |
|
("#","haʔɕɯ tagɯ"), |
|
|
|
("&","ando"), |
|
("&","ando"), |
|
|
|
("%","paːsento"), |
|
("%","paːsento"), |
|
|
|
("@","aʔto saiɴ"), |
|
("@","aʔto saiɴ"), |
|
|
|
("~","—"), |
|
("kʲɯɯdʑɯɯkʲɯɯ.kʲɯɯdʑɯɯ","kʲɯɯdʑɯɯ kʲɯɯ teɴ kʲɯɯdʑɯɯ") |
|
|
|
|
|
|
|
|
|
|
|
]) |
|
|
|
def random_sym_fix_no_space(text): |
|
orig = text |
|
|
|
for k, v in sym_ns.items(): |
|
text = text.replace(k, f" {v} ") |
|
|
|
return text |
|
|
|
|
|
spaces = dict([ |
|
|
|
("ɯ ɴ","ɯɴ"), |
|
("na ɴ ","naɴ "), |
|
(" mina ", " miɴna "), |
|
("ko ɴ ni tɕi ha","konnitɕiwa"), |
|
("ha i","hai"), |
|
("boɯtɕama","boʔtɕama"), |
|
("i eːi","ieːi"), |
|
("taiɕɯtsɯdʑoɯ","taiɕitsɯdʑoɯ"), |
|
("soɴna ka ze ni","soɴna fɯɯ ni"), |
|
(" i e ","ke "), |
|
("�",""), |
|
("×"," batsɯ "), |
|
("se ka ɯndo","sekaɯndo"), |
|
("i i","iː"), |
|
("i tɕi","itɕi"), |
|
("ka i","kai"), |
|
("naɴ ga","nani ga"), |
|
("i eː i","ieːi"), |
|
|
|
("naɴ koɽe","nani koɽe"), |
|
("naɴ soɽe","nani soɽe"), |
|
(" ɕeɴ "," seɴ "), |
|
|
|
|
|
|
|
|
|
|
|
|
|
("en ","eɴ "), |
|
("in ","iɴ "), |
|
("an ","aɴ "), |
|
("on ","oɴ "), |
|
("ɯn ","ɯɴ "), |
|
|
|
|
|
("koɴd o","kondo"), |
|
("ko ɴ d o","kondo"), |
|
("ko ɴ do","kondo"), |
|
|
|
("oanitɕaɴ","oniːtɕaɴ"), |
|
("oanisaɴ","oniːsaɴ"), |
|
("oanisama","oniːsama"), |
|
("hoːmɯrɯɴɯ","hoːmɯrɯːmɯ"), |
|
("so ɴ na ","sonna"), |
|
(" sonna "," sonna "), |
|
(" konna "," konna "), |
|
("ko ɴ na ","konna"), |
|
(" ko to "," koto "), |
|
("edʑdʑi","eʔtɕi"), |
|
(" edʑdʑ "," eʔtɕi "), |
|
(" dʑdʑ "," dʑiːdʑiː "), |
|
("secɯnd","sekaɯndo"), |
|
|
|
("ɴɯ","nɯ"), |
|
("ɴe","ne"), |
|
("ɴo","no"), |
|
("ɴa","na"), |
|
("ɴi","ni"), |
|
("ɴʲ","nʲ"), |
|
|
|
("hotond o","hotondo"), |
|
("hakoɴd e","hakoɴde"), |
|
("gakɯtɕi ɽi","gaʔtɕiɽi "), |
|
|
|
(" ʔ","ʔ"), |
|
("ʔ ","ʔ"), |
|
|
|
("-","ː"), |
|
("- ","ː"), |
|
("--","~ː"), |
|
("~","—"), |
|
("、",","), |
|
|
|
|
|
(" ː","ː"), |
|
('ka nade',"kanade"), |
|
|
|
("ohahasaɴ","okaːsaɴ"), |
|
(" "," "), |
|
("viː","bɯiː"), |
|
("ːː","ː—"), |
|
|
|
("d ʑ","dʑ"), |
|
("d a","da"), |
|
("d e","de"), |
|
("d o","do"), |
|
("d ɯ","dɯ"), |
|
|
|
("niːɕiki","ni iɕiki"), |
|
("anitɕaɴ","niːtɕaɴ"), |
|
("daiːtɕi","dai itɕi"), |
|
("niːta","ni ita"), |
|
("niːrɯ","ni irɯ"), |
|
("a—","aː"), |
|
("waːis","wa ais"), |
|
("waːiɕ","wa aiɕ"), |
|
("aːt","a at"), |
|
("waːʔ", "wa aʔ"), |
|
|
|
("naɴ sono","nani sono"), |
|
("naɴ kono","nani kono"), |
|
("naɴ ano","nani ano"), |
|
(" niːtaɽa"," ni itaɽa"), |
|
("doɽamaɕiːd","doɽama ɕiːdʲi"), |
|
("aɴ ta","anta"), |
|
("aɴta","anta"), |
|
("naniːʔteɴ","nani iʔteɴ"), |
|
("niːkite","ni ikite"), |
|
("niːʔ","ni iʔ"), |
|
("niːɯ","ni iɯ"), |
|
("niːw","ni iw"), |
|
("niːkɯ","ni ikɯ"), |
|
("de—","de e"), |
|
("aːj","aː aj"), |
|
("aːɽ","a aɽ"), |
|
("aːr","a ar"), |
|
("ɕiːk ","ɕi ik"), |
|
("ɕijoː neɴ","ɕoɯneɴ") |
|
|
|
|
|
]) |
|
|
|
|
|
|
|
def random_space_fix(text): |
|
orig = text |
|
|
|
for k, v in spaces.items(): |
|
text = text.replace(k, v) |
|
|
|
return text |