Xenova HF staff commited on
Commit
64c2f1f
1 Parent(s): baf3828

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ckiplab/bert-base-chinese-pos",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "A",
15
+ "1": "Caa",
16
+ "2": "Cab",
17
+ "3": "Cba",
18
+ "4": "Cbb",
19
+ "5": "D",
20
+ "6": "Da",
21
+ "7": "Dfa",
22
+ "8": "Dfb",
23
+ "9": "Di",
24
+ "10": "Dk",
25
+ "11": "DM",
26
+ "12": "I",
27
+ "13": "Na",
28
+ "14": "Nb",
29
+ "15": "Nc",
30
+ "16": "Ncd",
31
+ "17": "Nd",
32
+ "18": "Nep",
33
+ "19": "Neqa",
34
+ "20": "Neqb",
35
+ "21": "Nes",
36
+ "22": "Neu",
37
+ "23": "Nf",
38
+ "24": "Ng",
39
+ "25": "Nh",
40
+ "26": "Nv",
41
+ "27": "P",
42
+ "28": "T",
43
+ "29": "VA",
44
+ "30": "VAC",
45
+ "31": "VB",
46
+ "32": "VC",
47
+ "33": "VCL",
48
+ "34": "VD",
49
+ "35": "VF",
50
+ "36": "VE",
51
+ "37": "VG",
52
+ "38": "VH",
53
+ "39": "VHC",
54
+ "40": "VI",
55
+ "41": "VJ",
56
+ "42": "VK",
57
+ "43": "VL",
58
+ "44": "V_2",
59
+ "45": "DE",
60
+ "46": "SHI",
61
+ "47": "FW",
62
+ "48": "COLONCATEGORY",
63
+ "49": "COMMACATEGORY",
64
+ "50": "DASHCATEGORY",
65
+ "51": "DOTCATEGORY",
66
+ "52": "ETCCATEGORY",
67
+ "53": "EXCLAMATIONCATEGORY",
68
+ "54": "PARENTHESISCATEGORY",
69
+ "55": "PAUSECATEGORY",
70
+ "56": "PERIODCATEGORY",
71
+ "57": "QUESTIONCATEGORY",
72
+ "58": "SEMICOLONCATEGORY",
73
+ "59": "SPCHANGECATEGORY"
74
+ },
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 3072,
77
+ "label2id": {
78
+ "A": 0,
79
+ "COLONCATEGORY": 48,
80
+ "COMMACATEGORY": 49,
81
+ "Caa": 1,
82
+ "Cab": 2,
83
+ "Cba": 3,
84
+ "Cbb": 4,
85
+ "D": 5,
86
+ "DASHCATEGORY": 50,
87
+ "DE": 45,
88
+ "DM": 11,
89
+ "DOTCATEGORY": 51,
90
+ "Da": 6,
91
+ "Dfa": 7,
92
+ "Dfb": 8,
93
+ "Di": 9,
94
+ "Dk": 10,
95
+ "ETCCATEGORY": 52,
96
+ "EXCLAMATIONCATEGORY": 53,
97
+ "FW": 47,
98
+ "I": 12,
99
+ "Na": 13,
100
+ "Nb": 14,
101
+ "Nc": 15,
102
+ "Ncd": 16,
103
+ "Nd": 17,
104
+ "Nep": 18,
105
+ "Neqa": 19,
106
+ "Neqb": 20,
107
+ "Nes": 21,
108
+ "Neu": 22,
109
+ "Nf": 23,
110
+ "Ng": 24,
111
+ "Nh": 25,
112
+ "Nv": 26,
113
+ "P": 27,
114
+ "PARENTHESISCATEGORY": 54,
115
+ "PAUSECATEGORY": 55,
116
+ "PERIODCATEGORY": 56,
117
+ "QUESTIONCATEGORY": 57,
118
+ "SEMICOLONCATEGORY": 58,
119
+ "SHI": 46,
120
+ "SPCHANGECATEGORY": 59,
121
+ "T": 28,
122
+ "VA": 29,
123
+ "VAC": 30,
124
+ "VB": 31,
125
+ "VC": 32,
126
+ "VCL": 33,
127
+ "VD": 34,
128
+ "VE": 36,
129
+ "VF": 35,
130
+ "VG": 37,
131
+ "VH": 38,
132
+ "VHC": 39,
133
+ "VI": 40,
134
+ "VJ": 41,
135
+ "VK": 42,
136
+ "VL": 43,
137
+ "V_2": 44
138
+ },
139
+ "layer_norm_eps": 1e-12,
140
+ "max_position_embeddings": 512,
141
+ "model_type": "bert",
142
+ "num_attention_heads": 12,
143
+ "num_hidden_layers": 12,
144
+ "pad_token_id": 0,
145
+ "pooler_fc_size": 768,
146
+ "pooler_num_attention_heads": 12,
147
+ "pooler_num_fc_layers": 3,
148
+ "pooler_size_per_head": 128,
149
+ "pooler_type": "first_token_transform",
150
+ "position_embedding_type": "absolute",
151
+ "tokenizer_class": "BertTokenizerFast",
152
+ "transformers_version": "4.28.1",
153
+ "type_vocab_size": 2,
154
+ "use_cache": true,
155
+ "vocab_size": 21128
156
+ }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7ce5cef6f795cc9415ee1f1a19e9b81fc2eaa076145993b764cc178fba31d6
3
+ size 407158988
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6a40e33b05fcf863fd84513717990683920ae601c100739259aa9365b2b7f7
3
+ size 102533883
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff