Commit
·
454b346
1
Parent(s):
76a01f7
version 1.5.4
Browse files- setup.py +2 -2
- suparkanbun/simplify.py +8 -7
- suparkanbun/suparkanbun.py +2 -1
- suparkanbun/tradify.py +36 -35
setup.py
CHANGED
@@ -5,7 +5,7 @@ URL="https://github.com/KoichiYasuoka/SuPar-Kanbun"
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="suparkanbun",
|
8 |
-
version="1.4
|
9 |
description="Tokenizer POS-tagger and Dependency-parser for Classical Chinese",
|
10 |
long_description=long_description,
|
11 |
long_description_content_type="text/markdown",
|
@@ -19,7 +19,7 @@ setuptools.setup(
|
|
19 |
"supar>=1.1.4",
|
20 |
"transformers>=4.0.1",
|
21 |
"spacy>=2.2.2",
|
22 |
-
"deplacy>=2.0
|
23 |
],
|
24 |
python_requires=">=3.7",
|
25 |
package_data={"suparkanbun":["models/*.txt","models/*/*.txt","models/*/*.json"]},
|
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="suparkanbun",
|
8 |
+
version="1.5.4",
|
9 |
description="Tokenizer POS-tagger and Dependency-parser for Classical Chinese",
|
10 |
long_description=long_description,
|
11 |
long_description_content_type="text/markdown",
|
|
|
19 |
"supar>=1.1.4",
|
20 |
"transformers>=4.0.1",
|
21 |
"spacy>=2.2.2",
|
22 |
+
"deplacy>=2.1.0"
|
23 |
],
|
24 |
python_requires=">=3.7",
|
25 |
package_data={"suparkanbun":["models/*.txt","models/*/*.txt","models/*/*.json"]},
|
suparkanbun/simplify.py
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
#! /usr/bin/python3 -i
|
2 |
# coding=utf-8
|
3 |
simplify={
|
4 |
-
"𧦧":"𫍟",
|
5 |
-
"𨬟":"𰾵",
|
6 |
-
"𡑍":"𫭼",
|
7 |
-
"𥳁":"𥲤",
|
8 |
-
"𠕋":"册",
|
9 |
-
"𡐨":"壄",
|
10 |
-
"𣙜":"榷",
|
11 |
"㑳":"㑇",
|
12 |
"㑹":"会",
|
13 |
"㘚":"㘎",
|
@@ -1599,6 +1592,7 @@ simplify={
|
|
1599 |
"艷":"艳",
|
1600 |
"芻":"刍",
|
1601 |
"茍":"苟",
|
|
|
1602 |
"茲":"兹",
|
1603 |
"荅":"答",
|
1604 |
"荊":"荆",
|
@@ -3238,4 +3232,11 @@ simplify={
|
|
3238 |
"逸":"逸",
|
3239 |
"難":"难",
|
3240 |
"頻":"频",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3241 |
}
|
|
|
1 |
#! /usr/bin/python3 -i
|
2 |
# coding=utf-8
|
3 |
simplify={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"㑳":"㑇",
|
5 |
"㑹":"会",
|
6 |
"㘚":"㘎",
|
|
|
1592 |
"艷":"艳",
|
1593 |
"芻":"刍",
|
1594 |
"茍":"苟",
|
1595 |
+
"茘":"荔",
|
1596 |
"茲":"兹",
|
1597 |
"荅":"答",
|
1598 |
"荊":"荆",
|
|
|
3232 |
"逸":"逸",
|
3233 |
"難":"难",
|
3234 |
"頻":"频",
|
3235 |
+
"𠕋":"册",
|
3236 |
+
"𡐨":"壄",
|
3237 |
+
"𡑍":"𫭼",
|
3238 |
+
"𣙜":"榷",
|
3239 |
+
"𥳁":"𥲤",
|
3240 |
+
"𧦧":"𫍟",
|
3241 |
+
"𨬟":"𰾵",
|
3242 |
}
|
suparkanbun/suparkanbun.py
CHANGED
@@ -143,7 +143,8 @@ class SuParKanbunTokenizer(object):
|
|
143 |
heads.append(0)
|
144 |
deps.append(r)
|
145 |
else:
|
146 |
-
|
|
|
147 |
deps.append(vs.add(t["deprel"]))
|
148 |
spaces.append(False)
|
149 |
g=self.gloss(form,xpos)
|
|
|
143 |
heads.append(0)
|
144 |
deps.append(r)
|
145 |
else:
|
146 |
+
h=t["head"]-i-1
|
147 |
+
heads.append(2**64+h if h<0 else h)
|
148 |
deps.append(vs.add(t["deprel"]))
|
149 |
spaces.append(False)
|
150 |
g=self.gloss(form,xpos)
|
suparkanbun/tradify.py
CHANGED
@@ -1,41 +1,6 @@
|
|
1 |
#! /usr/bin/python3 -i
|
2 |
# coding=utf-8
|
3 |
tradify={
|
4 |
-
"𫍟":"𧦧",
|
5 |
-
"𠕋":"冊",
|
6 |
-
"𫮃":"墠",
|
7 |
-
"𪪼":"彃",
|
8 |
-
"𢭏":"擣",
|
9 |
-
"𥐟":"礒",
|
10 |
-
"𬘝":"紾",
|
11 |
-
"𫄨":"絺",
|
12 |
-
"𮉪":"緅",
|
13 |
-
"𰬸":"繐",
|
14 |
-
"𫄸":"纁",
|
15 |
-
"𮉡":"纑",
|
16 |
-
"𫄥":"纚",
|
17 |
-
"𦰏":"蓧",
|
18 |
-
"𫉁":"薆",
|
19 |
-
"𧦧":"訑",
|
20 |
-
"𫍥":"誂",
|
21 |
-
"𬤊":"諟",
|
22 |
-
"𬤣":"譈",
|
23 |
-
"𫐄":"軏",
|
24 |
-
"𫐐":"輗",
|
25 |
-
"𬨎":"輶",
|
26 |
-
"𫓧":"鈇",
|
27 |
-
"𨱂":"鈋",
|
28 |
-
"𬱙":"頖",
|
29 |
-
"𫖹":"顣",
|
30 |
-
"𫗞":"飦",
|
31 |
-
"𫗦":"餔",
|
32 |
-
"𮩝":"餲",
|
33 |
-
"𮩞":"饐",
|
34 |
-
"𫗴":"饘",
|
35 |
-
"𬶍":"鮀",
|
36 |
-
"𫚈":"鱮",
|
37 |
-
"𫛞":"鴃",
|
38 |
-
"𫜁":"鷩",
|
39 |
"㑹":"會",
|
40 |
"㧛":"攬",
|
41 |
"㧞":"拔",
|
@@ -798,6 +763,7 @@ tradify={
|
|
798 |
"苍":"蒼",
|
799 |
"苏":"蘇",
|
800 |
"茕":"煢",
|
|
|
801 |
"茧":"繭",
|
802 |
"荅":"答",
|
803 |
"荆":"荊",
|
@@ -1281,4 +1247,39 @@ tradify={
|
|
1281 |
"飯":"飯",
|
1282 |
"館":"館",
|
1283 |
"既":"既",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1284 |
}
|
|
|
1 |
#! /usr/bin/python3 -i
|
2 |
# coding=utf-8
|
3 |
tradify={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"㑹":"會",
|
5 |
"㧛":"攬",
|
6 |
"㧞":"拔",
|
|
|
763 |
"苍":"蒼",
|
764 |
"苏":"蘇",
|
765 |
"茕":"煢",
|
766 |
+
"茘":"荔",
|
767 |
"茧":"繭",
|
768 |
"荅":"答",
|
769 |
"荆":"荊",
|
|
|
1247 |
"飯":"飯",
|
1248 |
"館":"館",
|
1249 |
"既":"既",
|
1250 |
+
"𠕋":"冊",
|
1251 |
+
"𢭏":"擣",
|
1252 |
+
"𥐟":"礒",
|
1253 |
+
"𦰏":"蓧",
|
1254 |
+
"𧦧":"訑",
|
1255 |
+
"𨱂":"鈋",
|
1256 |
+
"𪪼":"彃",
|
1257 |
+
"𫄥":"纚",
|
1258 |
+
"𫄨":"絺",
|
1259 |
+
"𫄸":"纁",
|
1260 |
+
"𫉁":"薆",
|
1261 |
+
"𫍟":"𧦧",
|
1262 |
+
"𫍥":"誂",
|
1263 |
+
"𫐄":"軏",
|
1264 |
+
"𫐐":"輗",
|
1265 |
+
"𫓧":"鈇",
|
1266 |
+
"𫖹":"顣",
|
1267 |
+
"𫗞":"飦",
|
1268 |
+
"𫗦":"餔",
|
1269 |
+
"𫗴":"饘",
|
1270 |
+
"𫚈":"鱮",
|
1271 |
+
"𫛞":"鴃",
|
1272 |
+
"𫜁":"鷩",
|
1273 |
+
"𫮃":"墠",
|
1274 |
+
"𬘝":"紾",
|
1275 |
+
"𬤊":"諟",
|
1276 |
+
"𬤣":"譈",
|
1277 |
+
"𬨎":"輶",
|
1278 |
+
"𬱙":"頖",
|
1279 |
+
"𬶍":"鮀",
|
1280 |
+
"𮉡":"纑",
|
1281 |
+
"𮉪":"緅",
|
1282 |
+
"𮩝":"餲",
|
1283 |
+
"𮩞":"饐",
|
1284 |
+
"𰬸":"繐",
|
1285 |
}
|