Spaces:
Sleeping
Sleeping
Delete tools
Browse files- tools/__init__.py +0 -3
- tools/classify_language.py +0 -197
- tools/log.py +0 -16
- tools/sentence.py +0 -173
- tools/translate.py +0 -61
tools/__init__.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
工具包
|
3 |
-
"""
|
|
|
|
|
|
|
|
tools/classify_language.py
DELETED
@@ -1,197 +0,0 @@
|
|
1 |
-
import regex as re
|
2 |
-
|
3 |
-
try:
|
4 |
-
from config import config
|
5 |
-
|
6 |
-
LANGUAGE_IDENTIFICATION_LIBRARY = (
|
7 |
-
config.webui_config.language_identification_library
|
8 |
-
)
|
9 |
-
except:
|
10 |
-
LANGUAGE_IDENTIFICATION_LIBRARY = "langid"
|
11 |
-
|
12 |
-
module = LANGUAGE_IDENTIFICATION_LIBRARY.lower()
|
13 |
-
|
14 |
-
langid_languages = [
|
15 |
-
"af",
|
16 |
-
"am",
|
17 |
-
"an",
|
18 |
-
"ar",
|
19 |
-
"as",
|
20 |
-
"az",
|
21 |
-
"be",
|
22 |
-
"bg",
|
23 |
-
"bn",
|
24 |
-
"br",
|
25 |
-
"bs",
|
26 |
-
"ca",
|
27 |
-
"cs",
|
28 |
-
"cy",
|
29 |
-
"da",
|
30 |
-
"de",
|
31 |
-
"dz",
|
32 |
-
"el",
|
33 |
-
"en",
|
34 |
-
"eo",
|
35 |
-
"es",
|
36 |
-
"et",
|
37 |
-
"eu",
|
38 |
-
"fa",
|
39 |
-
"fi",
|
40 |
-
"fo",
|
41 |
-
"fr",
|
42 |
-
"ga",
|
43 |
-
"gl",
|
44 |
-
"gu",
|
45 |
-
"he",
|
46 |
-
"hi",
|
47 |
-
"hr",
|
48 |
-
"ht",
|
49 |
-
"hu",
|
50 |
-
"hy",
|
51 |
-
"id",
|
52 |
-
"is",
|
53 |
-
"it",
|
54 |
-
"ja",
|
55 |
-
"jv",
|
56 |
-
"ka",
|
57 |
-
"kk",
|
58 |
-
"km",
|
59 |
-
"kn",
|
60 |
-
"ko",
|
61 |
-
"ku",
|
62 |
-
"ky",
|
63 |
-
"la",
|
64 |
-
"lb",
|
65 |
-
"lo",
|
66 |
-
"lt",
|
67 |
-
"lv",
|
68 |
-
"mg",
|
69 |
-
"mk",
|
70 |
-
"ml",
|
71 |
-
"mn",
|
72 |
-
"mr",
|
73 |
-
"ms",
|
74 |
-
"mt",
|
75 |
-
"nb",
|
76 |
-
"ne",
|
77 |
-
"nl",
|
78 |
-
"nn",
|
79 |
-
"no",
|
80 |
-
"oc",
|
81 |
-
"or",
|
82 |
-
"pa",
|
83 |
-
"pl",
|
84 |
-
"ps",
|
85 |
-
"pt",
|
86 |
-
"qu",
|
87 |
-
"ro",
|
88 |
-
"ru",
|
89 |
-
"rw",
|
90 |
-
"se",
|
91 |
-
"si",
|
92 |
-
"sk",
|
93 |
-
"sl",
|
94 |
-
"sq",
|
95 |
-
"sr",
|
96 |
-
"sv",
|
97 |
-
"sw",
|
98 |
-
"ta",
|
99 |
-
"te",
|
100 |
-
"th",
|
101 |
-
"tl",
|
102 |
-
"tr",
|
103 |
-
"ug",
|
104 |
-
"uk",
|
105 |
-
"ur",
|
106 |
-
"vi",
|
107 |
-
"vo",
|
108 |
-
"wa",
|
109 |
-
"xh",
|
110 |
-
"zh",
|
111 |
-
"zu",
|
112 |
-
]
|
113 |
-
|
114 |
-
|
115 |
-
def classify_language(text: str, target_languages: list = None) -> str:
|
116 |
-
if module == "fastlid" or module == "fasttext":
|
117 |
-
from fastlid import fastlid, supported_langs
|
118 |
-
|
119 |
-
classifier = fastlid
|
120 |
-
if target_languages != None:
|
121 |
-
target_languages = [
|
122 |
-
lang for lang in target_languages if lang in supported_langs
|
123 |
-
]
|
124 |
-
fastlid.set_languages = target_languages
|
125 |
-
elif module == "langid":
|
126 |
-
import langid
|
127 |
-
|
128 |
-
classifier = langid.classify
|
129 |
-
if target_languages != None:
|
130 |
-
target_languages = [
|
131 |
-
lang for lang in target_languages if lang in langid_languages
|
132 |
-
]
|
133 |
-
langid.set_languages(target_languages)
|
134 |
-
else:
|
135 |
-
raise ValueError(f"Wrong module {module}")
|
136 |
-
|
137 |
-
lang = classifier(text)[0]
|
138 |
-
|
139 |
-
return lang
|
140 |
-
|
141 |
-
|
142 |
-
def classify_zh_ja(text: str) -> str:
|
143 |
-
for idx, char in enumerate(text):
|
144 |
-
unicode_val = ord(char)
|
145 |
-
|
146 |
-
# 检测日语字符
|
147 |
-
if 0x3040 <= unicode_val <= 0x309F or 0x30A0 <= unicode_val <= 0x30FF:
|
148 |
-
return "ja"
|
149 |
-
|
150 |
-
# 检测汉字字符
|
151 |
-
if 0x4E00 <= unicode_val <= 0x9FFF:
|
152 |
-
# 检查周围的字符
|
153 |
-
next_char = text[idx + 1] if idx + 1 < len(text) else None
|
154 |
-
|
155 |
-
if next_char and (
|
156 |
-
0x3040 <= ord(next_char) <= 0x309F or 0x30A0 <= ord(next_char) <= 0x30FF
|
157 |
-
):
|
158 |
-
return "ja"
|
159 |
-
|
160 |
-
return "zh"
|
161 |
-
|
162 |
-
|
163 |
-
def split_alpha_nonalpha(text, mode=1):
|
164 |
-
if mode == 1:
|
165 |
-
pattern = r"(?<=[\u4e00-\u9fff\u3040-\u30FF\d\s])(?=[\p{Latin}])|(?<=[\p{Latin}\s])(?=[\u4e00-\u9fff\u3040-\u30FF\d])"
|
166 |
-
elif mode == 2:
|
167 |
-
pattern = r"(?<=[\u4e00-\u9fff\u3040-\u30FF\s])(?=[\p{Latin}\d])|(?<=[\p{Latin}\d\s])(?=[\u4e00-\u9fff\u3040-\u30FF])"
|
168 |
-
else:
|
169 |
-
raise ValueError("Invalid mode. Supported modes are 1 and 2.")
|
170 |
-
|
171 |
-
return re.split(pattern, text)
|
172 |
-
|
173 |
-
|
174 |
-
if __name__ == "__main__":
|
175 |
-
text = "这是一个测试文本"
|
176 |
-
print(classify_language(text))
|
177 |
-
print(classify_zh_ja(text)) # "zh"
|
178 |
-
|
179 |
-
text = "これはテストテキストです"
|
180 |
-
print(classify_language(text))
|
181 |
-
print(classify_zh_ja(text)) # "ja"
|
182 |
-
|
183 |
-
text = "vits和Bert-VITS2是tts模型。花费3days.花费3天。Take 3 days"
|
184 |
-
|
185 |
-
print(split_alpha_nonalpha(text, mode=1))
|
186 |
-
# output: ['vits', '和', 'Bert-VITS', '2是', 'tts', '模型。花费3', 'days.花费3天。Take 3 days']
|
187 |
-
|
188 |
-
print(split_alpha_nonalpha(text, mode=2))
|
189 |
-
# output: ['vits', '和', 'Bert-VITS2', '是', 'tts', '模型。花费', '3days.花费', '3', '天。Take 3 days']
|
190 |
-
|
191 |
-
text = "vits 和 Bert-VITS2 是 tts 模型。花费3days.花费3天。Take 3 days"
|
192 |
-
print(split_alpha_nonalpha(text, mode=1))
|
193 |
-
# output: ['vits ', '和 ', 'Bert-VITS', '2 ', '是 ', 'tts ', '模型。花费3', 'days.花费3天。Take ', '3 ', 'days']
|
194 |
-
|
195 |
-
text = "vits 和 Bert-VITS2 是 tts 模型。花费3days.花费3天。Take 3 days"
|
196 |
-
print(split_alpha_nonalpha(text, mode=2))
|
197 |
-
# output: ['vits ', '和 ', 'Bert-VITS2 ', '是 ', 'tts ', '模型。花费', '3days.花费', '3', '天。Take ', '3 ', 'days']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/log.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
logger封装
|
3 |
-
"""
|
4 |
-
from loguru import logger
|
5 |
-
import sys
|
6 |
-
|
7 |
-
|
8 |
-
# 移除所有默认的处理器
|
9 |
-
logger.remove()
|
10 |
-
|
11 |
-
# 自定义格式并添加到标准输出
|
12 |
-
log_format = (
|
13 |
-
"<g>{time:MM-DD HH:mm:ss}</g> |<lvl>{level:^8}</lvl>| {file}:{line} | {message}"
|
14 |
-
)
|
15 |
-
|
16 |
-
logger.add(sys.stdout, format=log_format, backtrace=True, diagnose=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/sentence.py
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
|
3 |
-
import regex as re
|
4 |
-
|
5 |
-
from tools.classify_language import classify_language, split_alpha_nonalpha
|
6 |
-
|
7 |
-
|
8 |
-
def check_is_none(item) -> bool:
|
9 |
-
"""none -> True, not none -> False"""
|
10 |
-
return (
|
11 |
-
item is None
|
12 |
-
or (isinstance(item, str) and str(item).isspace())
|
13 |
-
or str(item) == ""
|
14 |
-
)
|
15 |
-
|
16 |
-
|
17 |
-
def markup_language(text: str, target_languages: list = None) -> str:
|
18 |
-
pattern = (
|
19 |
-
r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
|
20 |
-
r"\!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
|
21 |
-
r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
|
22 |
-
)
|
23 |
-
sentences = re.split(pattern, text)
|
24 |
-
|
25 |
-
pre_lang = ""
|
26 |
-
p = 0
|
27 |
-
|
28 |
-
if target_languages is not None:
|
29 |
-
sorted_target_languages = sorted(target_languages)
|
30 |
-
if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
|
31 |
-
new_sentences = []
|
32 |
-
for sentence in sentences:
|
33 |
-
new_sentences.extend(split_alpha_nonalpha(sentence))
|
34 |
-
sentences = new_sentences
|
35 |
-
|
36 |
-
for sentence in sentences:
|
37 |
-
if check_is_none(sentence):
|
38 |
-
continue
|
39 |
-
|
40 |
-
lang = classify_language(sentence, target_languages)
|
41 |
-
|
42 |
-
if pre_lang == "":
|
43 |
-
text = text[:p] + text[p:].replace(
|
44 |
-
sentence, f"[{lang.upper()}]{sentence}", 1
|
45 |
-
)
|
46 |
-
p += len(f"[{lang.upper()}]")
|
47 |
-
elif pre_lang != lang:
|
48 |
-
text = text[:p] + text[p:].replace(
|
49 |
-
sentence, f"[{pre_lang.upper()}][{lang.upper()}]{sentence}", 1
|
50 |
-
)
|
51 |
-
p += len(f"[{pre_lang.upper()}][{lang.upper()}]")
|
52 |
-
pre_lang = lang
|
53 |
-
p += text[p:].index(sentence) + len(sentence)
|
54 |
-
text += f"[{pre_lang.upper()}]"
|
55 |
-
|
56 |
-
return text
|
57 |
-
|
58 |
-
|
59 |
-
def split_by_language(text: str, target_languages: list = None) -> list:
|
60 |
-
pattern = (
|
61 |
-
r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
|
62 |
-
r"\!?\。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
|
63 |
-
r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
|
64 |
-
)
|
65 |
-
sentences = re.split(pattern, text)
|
66 |
-
|
67 |
-
pre_lang = ""
|
68 |
-
start = 0
|
69 |
-
end = 0
|
70 |
-
sentences_list = []
|
71 |
-
|
72 |
-
if target_languages is not None:
|
73 |
-
sorted_target_languages = sorted(target_languages)
|
74 |
-
if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
|
75 |
-
new_sentences = []
|
76 |
-
for sentence in sentences:
|
77 |
-
new_sentences.extend(split_alpha_nonalpha(sentence))
|
78 |
-
sentences = new_sentences
|
79 |
-
|
80 |
-
for sentence in sentences:
|
81 |
-
if check_is_none(sentence):
|
82 |
-
continue
|
83 |
-
|
84 |
-
lang = classify_language(sentence, target_languages)
|
85 |
-
|
86 |
-
end += text[end:].index(sentence)
|
87 |
-
if pre_lang != "" and pre_lang != lang:
|
88 |
-
sentences_list.append((text[start:end], pre_lang))
|
89 |
-
start = end
|
90 |
-
end += len(sentence)
|
91 |
-
pre_lang = lang
|
92 |
-
sentences_list.append((text[start:], pre_lang))
|
93 |
-
|
94 |
-
return sentences_list
|
95 |
-
|
96 |
-
|
97 |
-
def sentence_split(text: str, max: int) -> list:
|
98 |
-
pattern = r"[!(),—+\-.:;??。,、;:]+"
|
99 |
-
sentences = re.split(pattern, text)
|
100 |
-
discarded_chars = re.findall(pattern, text)
|
101 |
-
|
102 |
-
sentences_list, count, p = [], 0, 0
|
103 |
-
|
104 |
-
# 按被分割的符号遍历
|
105 |
-
for i, discarded_chars in enumerate(discarded_chars):
|
106 |
-
count += len(sentences[i]) + len(discarded_chars)
|
107 |
-
if count >= max:
|
108 |
-
sentences_list.append(text[p : p + count].strip())
|
109 |
-
p += count
|
110 |
-
count = 0
|
111 |
-
|
112 |
-
# 加入最后剩余的文本
|
113 |
-
if p < len(text):
|
114 |
-
sentences_list.append(text[p:])
|
115 |
-
|
116 |
-
return sentences_list
|
117 |
-
|
118 |
-
|
119 |
-
def sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None):
|
120 |
-
# 如果该speaker只支持一种语言
|
121 |
-
if speaker_lang is not None and len(speaker_lang) == 1:
|
122 |
-
if lang.upper() not in ["AUTO", "MIX"] and lang.lower() != speaker_lang[0]:
|
123 |
-
logging.debug(
|
124 |
-
f'lang "{lang}" is not in speaker_lang {speaker_lang},automatically set lang={speaker_lang[0]}'
|
125 |
-
)
|
126 |
-
lang = speaker_lang[0]
|
127 |
-
|
128 |
-
sentences_list = []
|
129 |
-
if lang.upper() != "MIX":
|
130 |
-
if max <= 0:
|
131 |
-
sentences_list.append(
|
132 |
-
markup_language(text, speaker_lang)
|
133 |
-
if lang.upper() == "AUTO"
|
134 |
-
else f"[{lang.upper()}]{text}[{lang.upper()}]"
|
135 |
-
)
|
136 |
-
else:
|
137 |
-
for i in sentence_split(text, max):
|
138 |
-
if check_is_none(i):
|
139 |
-
continue
|
140 |
-
sentences_list.append(
|
141 |
-
markup_language(i, speaker_lang)
|
142 |
-
if lang.upper() == "AUTO"
|
143 |
-
else f"[{lang.upper()}]{i}[{lang.upper()}]"
|
144 |
-
)
|
145 |
-
else:
|
146 |
-
sentences_list.append(text)
|
147 |
-
|
148 |
-
for i in sentences_list:
|
149 |
-
logging.debug(i)
|
150 |
-
|
151 |
-
return sentences_list
|
152 |
-
|
153 |
-
|
154 |
-
if __name__ == "__main__":
|
155 |
-
text = "这几天心里颇不宁静。今晚在院子里坐着乘凉,忽然想起日日走过的荷塘,在这满月的光里,总该另有一番样子吧。月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;妻在屋里拍着闰儿,迷迷糊糊地哼着眠歌。我悄悄地披了大衫,带上门出去。"
|
156 |
-
print(markup_language(text, target_languages=None))
|
157 |
-
print(sentence_split(text, max=50))
|
158 |
-
print(sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None))
|
159 |
-
|
160 |
-
text = "你好,这是一段用来测试自动标注的文本。こんにちは,これは自動ラベリングのテスト用テキストです.Hello, this is a piece of text to test autotagging.你好!今天我们要介绍VITS项目,其重点是使用了GAN Duration predictor和transformer flow,并且接入了Bert模型来提升韵律。Bert embedding会在稍后介绍。"
|
161 |
-
print(split_by_language(text, ["zh", "ja", "en"]))
|
162 |
-
|
163 |
-
text = "vits和Bert-VITS2是tts模型。花费3days.花费3天。Take 3 days"
|
164 |
-
|
165 |
-
print(split_by_language(text, ["zh", "ja", "en"]))
|
166 |
-
# output: [('vits', 'en'), ('和', 'ja'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
|
167 |
-
|
168 |
-
print(split_by_language(text, ["zh", "en"]))
|
169 |
-
# output: [('vits', 'en'), ('和', 'zh'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
|
170 |
-
|
171 |
-
text = "vits 和 Bert-VITS2 是 tts 模型。花费 3 days. 花费 3天。Take 3 days"
|
172 |
-
print(split_by_language(text, ["zh", "en"]))
|
173 |
-
# output: [('vits ', 'en'), ('和 ', 'zh'), ('Bert-VITS2 ', 'en'), ('是 ', 'zh'), ('tts ', 'en'), ('模型。花费 ', 'zh'), ('3 days. ', 'en'), ('花费 3天。', 'zh'), ('Take 3 days', 'en')]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/translate.py
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
翻译api
|
3 |
-
"""
|
4 |
-
from config import config
|
5 |
-
|
6 |
-
import random
|
7 |
-
import hashlib
|
8 |
-
import requests
|
9 |
-
|
10 |
-
|
11 |
-
def translate(Sentence: str, to_Language: str = "jp", from_Language: str = ""):
|
12 |
-
"""
|
13 |
-
:param Sentence: 待翻译语句
|
14 |
-
:param from_Language: 待翻译语句语言
|
15 |
-
:param to_Language: 目标语言
|
16 |
-
:return: 翻译后语句 出错时返回None
|
17 |
-
|
18 |
-
常见语言代码:中文 zh 英语 en 日语 jp
|
19 |
-
"""
|
20 |
-
appid = config.translate_config.app_key
|
21 |
-
key = config.translate_config.secret_key
|
22 |
-
if appid == "" or key == "":
|
23 |
-
return "请开发者在config.yml中配置app_key与secret_key"
|
24 |
-
url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
|
25 |
-
texts = Sentence.splitlines()
|
26 |
-
outTexts = []
|
27 |
-
for t in texts:
|
28 |
-
if t != "":
|
29 |
-
# 签名计算 参考文档 https://api.fanyi.baidu.com/product/113
|
30 |
-
salt = str(random.randint(1, 100000))
|
31 |
-
signString = appid + t + salt + key
|
32 |
-
hs = hashlib.md5()
|
33 |
-
hs.update(signString.encode("utf-8"))
|
34 |
-
signString = hs.hexdigest()
|
35 |
-
if from_Language == "":
|
36 |
-
from_Language = "auto"
|
37 |
-
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
38 |
-
payload = {
|
39 |
-
"q": t,
|
40 |
-
"from": from_Language,
|
41 |
-
"to": to_Language,
|
42 |
-
"appid": appid,
|
43 |
-
"salt": salt,
|
44 |
-
"sign": signString,
|
45 |
-
}
|
46 |
-
# 发送请求
|
47 |
-
try:
|
48 |
-
response = requests.post(
|
49 |
-
url=url, data=payload, headers=headers, timeout=3
|
50 |
-
)
|
51 |
-
response = response.json()
|
52 |
-
if "trans_result" in response.keys():
|
53 |
-
result = response["trans_result"][0]
|
54 |
-
if "dst" in result.keys():
|
55 |
-
dst = result["dst"]
|
56 |
-
outTexts.append(dst)
|
57 |
-
except Exception:
|
58 |
-
return Sentence
|
59 |
-
else:
|
60 |
-
outTexts.append(t)
|
61 |
-
return "\n".join(outTexts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|