KoichiYasuoka commited on
Commit
454b346
·
1 Parent(s): 76a01f7

version 1.5.4

Browse files
setup.py CHANGED
@@ -5,7 +5,7 @@ URL="https://github.com/KoichiYasuoka/SuPar-Kanbun"
5
 
6
  setuptools.setup(
7
  name="suparkanbun",
8
- version="1.4.6",
9
  description="Tokenizer POS-tagger and Dependency-parser for Classical Chinese",
10
  long_description=long_description,
11
  long_description_content_type="text/markdown",
@@ -19,7 +19,7 @@ setuptools.setup(
19
  "supar>=1.1.4",
20
  "transformers>=4.0.1",
21
  "spacy>=2.2.2",
22
- "deplacy>=2.0.3"
23
  ],
24
  python_requires=">=3.7",
25
  package_data={"suparkanbun":["models/*.txt","models/*/*.txt","models/*/*.json"]},
 
5
 
6
  setuptools.setup(
7
  name="suparkanbun",
8
+ version="1.5.4",
9
  description="Tokenizer POS-tagger and Dependency-parser for Classical Chinese",
10
  long_description=long_description,
11
  long_description_content_type="text/markdown",
 
19
  "supar>=1.1.4",
20
  "transformers>=4.0.1",
21
  "spacy>=2.2.2",
22
+ "deplacy>=2.1.0"
23
  ],
24
  python_requires=">=3.7",
25
  package_data={"suparkanbun":["models/*.txt","models/*/*.txt","models/*/*.json"]},
suparkanbun/simplify.py CHANGED
@@ -1,13 +1,6 @@
1
  #! /usr/bin/python3 -i
2
  # coding=utf-8
3
  simplify={
4
- "𧦧":"𫍟",
5
- "𨬟":"𰾵",
6
- "𡑍":"𫭼",
7
- "𥳁":"𥲤",
8
- "𠕋":"册",
9
- "𡐨":"壄",
10
- "𣙜":"榷",
11
  "㑳":"㑇",
12
  "㑹":"会",
13
  "㘚":"㘎",
@@ -1599,6 +1592,7 @@ simplify={
1599
  "艷":"艳",
1600
  "芻":"刍",
1601
  "茍":"苟",
 
1602
  "茲":"兹",
1603
  "荅":"答",
1604
  "荊":"荆",
@@ -3238,4 +3232,11 @@ simplify={
3238
  "逸":"逸",
3239
  "難":"难",
3240
  "頻":"频",
 
 
 
 
 
 
 
3241
  }
 
1
  #! /usr/bin/python3 -i
2
  # coding=utf-8
3
  simplify={
 
 
 
 
 
 
 
4
  "㑳":"㑇",
5
  "㑹":"会",
6
  "㘚":"㘎",
 
1592
  "艷":"艳",
1593
  "芻":"刍",
1594
  "茍":"苟",
1595
+ "茘":"荔",
1596
  "茲":"兹",
1597
  "荅":"答",
1598
  "荊":"荆",
 
3232
  "逸":"逸",
3233
  "難":"难",
3234
  "頻":"频",
3235
+ "𠕋":"册",
3236
+ "𡐨":"壄",
3237
+ "𡑍":"𫭼",
3238
+ "𣙜":"榷",
3239
+ "𥳁":"𥲤",
3240
+ "𧦧":"𫍟",
3241
+ "𨬟":"𰾵",
3242
  }
suparkanbun/suparkanbun.py CHANGED
@@ -143,7 +143,8 @@ class SuParKanbunTokenizer(object):
143
  heads.append(0)
144
  deps.append(r)
145
  else:
146
- heads.append(t["head"]-i-1)
 
147
  deps.append(vs.add(t["deprel"]))
148
  spaces.append(False)
149
  g=self.gloss(form,xpos)
 
143
  heads.append(0)
144
  deps.append(r)
145
  else:
146
+ h=t["head"]-i-1
147
+ heads.append(2**64+h if h<0 else h)
148
  deps.append(vs.add(t["deprel"]))
149
  spaces.append(False)
150
  g=self.gloss(form,xpos)
suparkanbun/tradify.py CHANGED
@@ -1,41 +1,6 @@
1
  #! /usr/bin/python3 -i
2
  # coding=utf-8
3
  tradify={
4
- "𫍟":"𧦧",
5
- "𠕋":"冊",
6
- "𫮃":"墠",
7
- "𪪼":"彃",
8
- "𢭏":"擣",
9
- "𥐟":"礒",
10
- "𬘝":"紾",
11
- "𫄨":"絺",
12
- "𮉪":"緅",
13
- "𰬸":"繐",
14
- "𫄸":"纁",
15
- "𮉡":"纑",
16
- "𫄥":"纚",
17
- "𦰏":"蓧",
18
- "𫉁":"薆",
19
- "𧦧":"訑",
20
- "𫍥":"誂",
21
- "𬤊":"諟",
22
- "𬤣":"譈",
23
- "𫐄":"軏",
24
- "𫐐":"輗",
25
- "𬨎":"輶",
26
- "𫓧":"鈇",
27
- "𨱂":"鈋",
28
- "𬱙":"頖",
29
- "𫖹":"顣",
30
- "𫗞":"飦",
31
- "𫗦":"餔",
32
- "𮩝":"餲",
33
- "𮩞":"饐",
34
- "𫗴":"饘",
35
- "𬶍":"鮀",
36
- "𫚈":"鱮",
37
- "𫛞":"鴃",
38
- "𫜁":"鷩",
39
  "㑹":"會",
40
  "㧛":"攬",
41
  "㧞":"拔",
@@ -798,6 +763,7 @@ tradify={
798
  "苍":"蒼",
799
  "苏":"蘇",
800
  "茕":"煢",
 
801
  "茧":"繭",
802
  "荅":"答",
803
  "荆":"荊",
@@ -1281,4 +1247,39 @@ tradify={
1281
  "飯":"飯",
1282
  "館":"館",
1283
  "既":"既",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1284
  }
 
1
  #! /usr/bin/python3 -i
2
  # coding=utf-8
3
  tradify={
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "㑹":"會",
5
  "㧛":"攬",
6
  "㧞":"拔",
 
763
  "苍":"蒼",
764
  "苏":"蘇",
765
  "茕":"煢",
766
+ "茘":"荔",
767
  "茧":"繭",
768
  "荅":"答",
769
  "荆":"荊",
 
1247
  "飯":"飯",
1248
  "館":"館",
1249
  "既":"既",
1250
+ "𠕋":"冊",
1251
+ "𢭏":"擣",
1252
+ "𥐟":"礒",
1253
+ "𦰏":"蓧",
1254
+ "𧦧":"訑",
1255
+ "𨱂":"鈋",
1256
+ "𪪼":"彃",
1257
+ "𫄥":"纚",
1258
+ "𫄨":"絺",
1259
+ "𫄸":"纁",
1260
+ "𫉁":"薆",
1261
+ "𫍟":"𧦧",
1262
+ "𫍥":"誂",
1263
+ "𫐄":"軏",
1264
+ "𫐐":"輗",
1265
+ "𫓧":"鈇",
1266
+ "𫖹":"顣",
1267
+ "𫗞":"飦",
1268
+ "𫗦":"餔",
1269
+ "𫗴":"饘",
1270
+ "𫚈":"鱮",
1271
+ "𫛞":"鴃",
1272
+ "𫜁":"鷩",
1273
+ "𫮃":"墠",
1274
+ "𬘝":"紾",
1275
+ "𬤊":"諟",
1276
+ "𬤣":"譈",
1277
+ "𬨎":"輶",
1278
+ "𬱙":"頖",
1279
+ "𬶍":"鮀",
1280
+ "𮉡":"纑",
1281
+ "𮉪":"緅",
1282
+ "𮩝":"餲",
1283
+ "𮩞":"饐",
1284
+ "𰬸":"繐",
1285
  }