Spaces:
Build error
Build error
File size: 3,349 Bytes
db3dea6 e076ae8 5ab552b e076ae8 5ab552b e076ae8 db3dea6 e076ae8 db3dea6 e076ae8 db3dea6 e076ae8 db3dea6 e076ae8 db3dea6 e076ae8 5ab552b e076ae8 5ab552b e076ae8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from TTS.tts.models.vits import Vits
from TTS.tts.configs.vits_config import VitsConfig
import numpy as np
import unicodedata
import regex
num_re = regex.compile(r"([0-9.,]*[0-9])")
digits = ["không", "một", "hai", "ba", "bốn", "năm", "sáu", "bảy", "tám", "chín"]
def read_number(num: str) -> str:
"""Translate numeric text into written form
Args: num (str) numeric text
Returns: (str) written form of num
"""
if len(num) == 1:
return digits[int(num)]
elif len(num) == 2 and num.isdigit():
n = int(num)
end = digits[n % 10]
if n == 10:
return "mười"
if n % 10 == 5:
end = "lăm"
if n % 10 == 0:
return digits[n // 10] + " mươi"
elif n < 20:
return "mười " + end
else:
if n % 10 == 1:
end = "mốt"
return digits[n // 10] + " mươi " + end
elif len(num) == 3 and num.isdigit():
n = int(num)
if n % 100 == 0:
return digits[n // 100] + " trăm"
elif num[1] == "0":
return digits[n // 100] + " trăm lẻ " + digits[n % 100]
else:
return digits[n // 100] + " trăm " + read_number(num[1:])
elif 4 <= len(num) <= 6 and num.isdigit():
n = int(num)
n1 = n // 1000
return read_number(str(n1)) + " ngàn " + read_number(num[-3:])
elif "," in num:
n1, n2 = num.split(",")
return read_number(n1) + " phẩy " + read_number(n2)
elif "." in num:
parts = num.split(".")
if len(parts) == 2:
if parts[1] == "000":
return read_number(parts[0]) + " ngàn"
elif parts[1].startswith("00"):
end = digits[int(parts[1][2:])]
return read_number(parts[0]) + " ngàn lẻ " + end
else:
return read_number(parts[0]) + " ngàn " + read_number(parts[1])
elif len(parts) == 3:
return (
read_number(parts[0])
+ " triệu "
+ read_number(parts[1])
+ " ngàn "
+ read_number(parts[2])
)
return num
def load_model():
config = VitsConfig()
config.load_json("vits/config.json")
vits = Vits.init_from_config(config)
vits.load_onnx("vits/coqui_vits.onnx")
text = "xin chào tôi là hoàng đây"
text_inputs = np.asarray(
vits.tokenizer.text_to_ids(text),
dtype=np.int64,
)[None, :]
audio = vits.inference_onnx(text_inputs)
return vits
def normalize_text(text):
"""Normalize the input text
Args: text (str) the input text
Returns: text (str) the normalized text
"""
# lowercase
text = text.lower()
# unicode normalize
text = unicodedata.normalize("NFKC", text)
text = text.replace(".", "")
text = text.replace(",", "")
text = text.replace(";", "")
text = text.replace(":", "")
text = text.replace("!", "")
text = text.replace("?", "")
text = text.replace("(", "")
# Convert numeric text into written form
text = num_re.sub(r" \1 ", text)
words = text.split()
words = [read_number(w) if num_re.fullmatch(w) else w for w in words]
text = " ".join(words)
return text
|