KoichiYasuoka commited on
Commit
42f798a
1 Parent(s): 315f556

model improved

Browse files
Files changed (3) hide show
  1. config.json +1014 -316
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
config.json CHANGED
@@ -12,322 +12,424 @@
12
  "id2label": {
13
  "0": "ADJ",
14
  "1": "ADP",
15
- "2": "ADP+PRON",
16
- "3": "ADV",
17
- "4": "ADV+AUX",
18
- "5": "ADV+PART",
19
- "6": "AUX",
20
- "7": "AUX+PART",
21
- "8": "B-ADJ",
22
- "9": "B-ADJ+ADJ",
23
- "10": "B-ADJ+NOUN",
24
- "11": "B-ADJ+NOUN+NOUN",
25
- "12": "B-ADJ+PART",
26
- "13": "B-ADJ+PROPN",
27
- "14": "B-ADJ+PUNCT",
28
- "15": "B-ADP",
29
- "16": "B-ADP+NOUN",
30
- "17": "B-ADV",
31
- "18": "B-ADV+AUX",
32
- "19": "B-ADV+PUNCT",
33
- "20": "B-AUX",
34
- "21": "B-AUX+PART",
35
- "22": "B-AUX+PART+VERB",
36
- "23": "B-AUX+VERB",
37
- "24": "B-CCONJ",
38
- "25": "B-DET",
39
- "26": "B-DET+AUX",
40
- "27": "B-DET+NOUN",
41
- "28": "B-INTJ",
42
- "29": "B-INTJ+PUNCT",
43
- "30": "B-NOUN",
44
- "31": "B-NOUN+AUX",
45
- "32": "B-NOUN+NOUN",
46
- "33": "B-NOUN+NOUN+VERB",
47
- "34": "B-NOUN+PART",
48
- "35": "B-NOUN+PUNCT",
49
- "36": "B-NOUN+VERB",
50
- "37": "B-NUM",
51
- "38": "B-PART",
52
- "39": "B-PRON",
53
- "40": "B-PRON+ADJ",
54
- "41": "B-PRON+ADV",
55
- "42": "B-PRON+AUX",
56
- "43": "B-PRON+PART",
57
- "44": "B-PRON+VERB",
58
- "45": "B-PROPN",
59
- "46": "B-PROPN+PART",
60
- "47": "B-PROPN+PROPN",
61
- "48": "B-PROPN+PUNCT",
62
- "49": "B-PUNCT",
63
- "50": "B-PUNCT+PUNCT",
64
- "51": "B-PUNCT+PUNCT+PUNCT",
65
- "52": "B-SCONJ",
66
- "53": "B-SYM",
67
- "54": "B-VERB",
68
- "55": "B-VERB+ADJ",
69
- "56": "B-VERB+ADJ+CCONJ",
70
- "57": "B-VERB+ADP",
71
- "58": "B-VERB+ADV",
72
- "59": "B-VERB+DET",
73
- "60": "B-VERB+NOUN",
74
- "61": "B-VERB+NOUN+NOUN",
75
- "62": "B-VERB+PART",
76
- "63": "B-VERB+PRON",
77
- "64": "B-VERB+SCONJ",
78
- "65": "B-X",
79
- "66": "B-X+PUNCT",
80
- "67": "B-X+X",
81
- "68": "B-X+X+PRON",
82
- "69": "CCONJ",
83
- "70": "DET",
84
- "71": "DET+NUM",
85
- "72": "I-ADJ",
86
- "73": "I-ADJ+ADJ",
87
- "74": "I-ADJ+NOUN",
88
- "75": "I-ADJ+NOUN+NOUN",
89
- "76": "I-ADJ+PART",
90
- "77": "I-ADJ+PROPN",
91
- "78": "I-ADJ+PUNCT",
92
- "79": "I-ADP",
93
- "80": "I-ADP+NOUN",
94
- "81": "I-ADV",
95
- "82": "I-ADV+AUX",
96
- "83": "I-ADV+PUNCT",
97
- "84": "I-AUX",
98
- "85": "I-AUX+PART",
99
- "86": "I-AUX+PART+VERB",
100
- "87": "I-AUX+VERB",
101
- "88": "I-CCONJ",
102
- "89": "I-DET",
103
- "90": "I-DET+AUX",
104
- "91": "I-DET+NOUN",
105
- "92": "I-INTJ",
106
- "93": "I-INTJ+PUNCT",
107
- "94": "I-NOUN",
108
- "95": "I-NOUN+AUX",
109
- "96": "I-NOUN+NOUN",
110
- "97": "I-NOUN+NOUN+VERB",
111
- "98": "I-NOUN+PART",
112
- "99": "I-NOUN+PUNCT",
113
- "100": "I-NOUN+VERB",
114
- "101": "I-NUM",
115
- "102": "I-PART",
116
- "103": "I-PRON",
117
- "104": "I-PRON+ADJ",
118
- "105": "I-PRON+ADV",
119
- "106": "I-PRON+AUX",
120
- "107": "I-PRON+PART",
121
- "108": "I-PRON+VERB",
122
- "109": "I-PROPN",
123
- "110": "I-PROPN+PART",
124
- "111": "I-PROPN+PROPN",
125
- "112": "I-PROPN+PUNCT",
126
- "113": "I-PUNCT",
127
- "114": "I-PUNCT+PUNCT",
128
- "115": "I-PUNCT+PUNCT+PUNCT",
129
- "116": "I-SCONJ",
130
- "117": "I-SYM",
131
- "118": "I-VERB",
132
- "119": "I-VERB+ADJ",
133
- "120": "I-VERB+ADJ+CCONJ",
134
- "121": "I-VERB+ADP",
135
- "122": "I-VERB+ADV",
136
- "123": "I-VERB+DET",
137
- "124": "I-VERB+NOUN",
138
- "125": "I-VERB+NOUN+NOUN",
139
- "126": "I-VERB+PART",
140
- "127": "I-VERB+PRON",
141
- "128": "I-VERB+SCONJ",
142
- "129": "I-X",
143
- "130": "I-X+PUNCT",
144
- "131": "I-X+X",
145
- "132": "I-X+X+PRON",
146
- "133": "INTJ",
147
- "134": "NOUN",
148
- "135": "NOUN+AUX",
149
- "136": "NOUN+PART",
150
- "137": "NUM",
151
- "138": "PART",
152
- "139": "PRON",
153
- "140": "PRON+AUX",
154
- "141": "PRON+VERB",
155
- "142": "PROPN",
156
- "143": "PROPN+PART",
157
- "144": "PUNCT",
158
- "145": "PUNCT+PUNCT",
159
- "146": "PUNCT+PUNCT+PUNCT",
160
- "147": "PUNCT+SYM",
161
- "148": "SCONJ",
162
- "149": "SYM",
163
- "150": "SYM+PUNCT",
164
- "151": "SYM+SYM",
165
- "152": "VERB",
166
- "153": "VERB+ADP",
167
- "154": "VERB+PART",
168
- "155": "VERB+PRON",
169
- "156": "X"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  },
171
  "initializer_range": 0.02,
172
  "intermediate_size": 3072,
173
  "label2id": {
174
  "ADJ": 0,
175
  "ADP": 1,
176
- "ADP+PRON": 2,
177
- "ADV": 3,
178
- "ADV+AUX": 4,
179
- "ADV+PART": 5,
180
- "AUX": 6,
181
- "AUX+PART": 7,
182
- "B-ADJ": 8,
183
- "B-ADJ+ADJ": 9,
184
- "B-ADJ+NOUN": 10,
185
- "B-ADJ+NOUN+NOUN": 11,
186
- "B-ADJ+PART": 12,
187
- "B-ADJ+PROPN": 13,
188
- "B-ADJ+PUNCT": 14,
189
- "B-ADP": 15,
190
- "B-ADP+NOUN": 16,
191
- "B-ADV": 17,
192
- "B-ADV+AUX": 18,
193
- "B-ADV+PUNCT": 19,
194
- "B-AUX": 20,
195
- "B-AUX+PART": 21,
196
- "B-AUX+PART+VERB": 22,
197
- "B-AUX+VERB": 23,
198
- "B-CCONJ": 24,
199
- "B-DET": 25,
200
- "B-DET+AUX": 26,
201
- "B-DET+NOUN": 27,
202
- "B-INTJ": 28,
203
- "B-INTJ+PUNCT": 29,
204
- "B-NOUN": 30,
205
- "B-NOUN+AUX": 31,
206
- "B-NOUN+NOUN": 32,
207
- "B-NOUN+NOUN+VERB": 33,
208
- "B-NOUN+PART": 34,
209
- "B-NOUN+PUNCT": 35,
210
- "B-NOUN+VERB": 36,
211
- "B-NUM": 37,
212
- "B-PART": 38,
213
- "B-PRON": 39,
214
- "B-PRON+ADJ": 40,
215
- "B-PRON+ADV": 41,
216
- "B-PRON+AUX": 42,
217
- "B-PRON+PART": 43,
218
- "B-PRON+VERB": 44,
219
- "B-PROPN": 45,
220
- "B-PROPN+PART": 46,
221
- "B-PROPN+PROPN": 47,
222
- "B-PROPN+PUNCT": 48,
223
- "B-PUNCT": 49,
224
- "B-PUNCT+PUNCT": 50,
225
- "B-PUNCT+PUNCT+PUNCT": 51,
226
- "B-SCONJ": 52,
227
- "B-SYM": 53,
228
- "B-VERB": 54,
229
- "B-VERB+ADJ": 55,
230
- "B-VERB+ADJ+CCONJ": 56,
231
- "B-VERB+ADP": 57,
232
- "B-VERB+ADV": 58,
233
- "B-VERB+DET": 59,
234
- "B-VERB+NOUN": 60,
235
- "B-VERB+NOUN+NOUN": 61,
236
- "B-VERB+PART": 62,
237
- "B-VERB+PRON": 63,
238
- "B-VERB+SCONJ": 64,
239
- "B-X": 65,
240
- "B-X+PUNCT": 66,
241
- "B-X+X": 67,
242
- "B-X+X+PRON": 68,
243
- "CCONJ": 69,
244
- "DET": 70,
245
- "DET+NUM": 71,
246
- "I-ADJ": 72,
247
- "I-ADJ+ADJ": 73,
248
- "I-ADJ+NOUN": 74,
249
- "I-ADJ+NOUN+NOUN": 75,
250
- "I-ADJ+PART": 76,
251
- "I-ADJ+PROPN": 77,
252
- "I-ADJ+PUNCT": 78,
253
- "I-ADP": 79,
254
- "I-ADP+NOUN": 80,
255
- "I-ADV": 81,
256
- "I-ADV+AUX": 82,
257
- "I-ADV+PUNCT": 83,
258
- "I-AUX": 84,
259
- "I-AUX+PART": 85,
260
- "I-AUX+PART+VERB": 86,
261
- "I-AUX+VERB": 87,
262
- "I-CCONJ": 88,
263
- "I-DET": 89,
264
- "I-DET+AUX": 90,
265
- "I-DET+NOUN": 91,
266
- "I-INTJ": 92,
267
- "I-INTJ+PUNCT": 93,
268
- "I-NOUN": 94,
269
- "I-NOUN+AUX": 95,
270
- "I-NOUN+NOUN": 96,
271
- "I-NOUN+NOUN+VERB": 97,
272
- "I-NOUN+PART": 98,
273
- "I-NOUN+PUNCT": 99,
274
- "I-NOUN+VERB": 100,
275
- "I-NUM": 101,
276
- "I-PART": 102,
277
- "I-PRON": 103,
278
- "I-PRON+ADJ": 104,
279
- "I-PRON+ADV": 105,
280
- "I-PRON+AUX": 106,
281
- "I-PRON+PART": 107,
282
- "I-PRON+VERB": 108,
283
- "I-PROPN": 109,
284
- "I-PROPN+PART": 110,
285
- "I-PROPN+PROPN": 111,
286
- "I-PROPN+PUNCT": 112,
287
- "I-PUNCT": 113,
288
- "I-PUNCT+PUNCT": 114,
289
- "I-PUNCT+PUNCT+PUNCT": 115,
290
- "I-SCONJ": 116,
291
- "I-SYM": 117,
292
- "I-VERB": 118,
293
- "I-VERB+ADJ": 119,
294
- "I-VERB+ADJ+CCONJ": 120,
295
- "I-VERB+ADP": 121,
296
- "I-VERB+ADV": 122,
297
- "I-VERB+DET": 123,
298
- "I-VERB+NOUN": 124,
299
- "I-VERB+NOUN+NOUN": 125,
300
- "I-VERB+PART": 126,
301
- "I-VERB+PRON": 127,
302
- "I-VERB+SCONJ": 128,
303
- "I-X": 129,
304
- "I-X+PUNCT": 130,
305
- "I-X+X": 131,
306
- "I-X+X+PRON": 132,
307
- "INTJ": 133,
308
- "NOUN": 134,
309
- "NOUN+AUX": 135,
310
- "NOUN+PART": 136,
311
- "NUM": 137,
312
- "PART": 138,
313
- "PRON": 139,
314
- "PRON+AUX": 140,
315
- "PRON+VERB": 141,
316
- "PROPN": 142,
317
- "PROPN+PART": 143,
318
- "PUNCT": 144,
319
- "PUNCT+PUNCT": 145,
320
- "PUNCT+PUNCT+PUNCT": 146,
321
- "PUNCT+SYM": 147,
322
- "SCONJ": 148,
323
- "SYM": 149,
324
- "SYM+PUNCT": 150,
325
- "SYM+SYM": 151,
326
- "VERB": 152,
327
- "VERB+ADP": 153,
328
- "VERB+PART": 154,
329
- "VERB+PRON": 155,
330
- "X": 156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  },
332
  "layer_norm_eps": 1e-05,
333
  "max_position_embeddings": 514,
@@ -346,6 +448,14 @@
346
  "interestingsocial": [
347
  "interesting",
348
  "social"
 
 
 
 
 
 
 
 
349
  ]
350
  },
351
  "ADJ+NOUN": {
@@ -353,14 +463,26 @@
353
  "big",
354
  "source"
355
  ],
 
 
 
 
356
  "distractingelements": [
357
  "distracting",
358
  "elements"
359
  ],
 
 
 
 
360
  "gruelingsanctions": [
361
  "grueling",
362
  "sanctions"
363
  ],
 
 
 
 
364
  "longexposures": [
365
  "long",
366
  "exposures"
@@ -369,6 +491,10 @@
369
  "long",
370
  "hair"
371
  ],
 
 
 
 
372
  "ongoingsummaries": [
373
  "ongoing",
374
  "summaries"
@@ -377,9 +503,29 @@
377
  "pre-meeting",
378
  "site"
379
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  "stronghints": [
381
  "strong",
382
  "hints"
 
 
 
 
383
  ]
384
  },
385
  "ADJ+PART": {
@@ -395,6 +541,10 @@
395
  ]
396
  },
397
  "ADJ+PUNCT": {
 
 
 
 
398
  "e.g.:": [
399
  "e.g.",
400
  ":"
@@ -404,6 +554,12 @@
404
  ","
405
  ]
406
  },
 
 
 
 
 
 
407
  "ADP+NOUN": {
408
  "Infact": [
409
  "In",
@@ -424,6 +580,10 @@
424
  "Heres": [
425
  "Here",
426
  "s"
 
 
 
 
427
  ]
428
  },
429
  "ADV+PART": {
@@ -437,6 +597,10 @@
437
  "E.g.",
438
  ","
439
  ],
 
 
 
 
440
  "i.e.,": [
441
  "i.e.",
442
  ","
@@ -446,11 +610,21 @@
446
  ":"
447
  ]
448
  },
 
 
 
 
 
 
449
  "AUX+PART": {
450
  "Aren't": [
451
  "Are",
452
  "n't"
453
  ],
 
 
 
 
454
  "CANT": [
455
  "CA",
456
  "NT"
@@ -463,6 +637,10 @@
463
  "Can",
464
  "not"
465
  ],
 
 
 
 
466
  "DON'T": [
467
  "DO",
468
  "N'T"
@@ -479,10 +657,26 @@
479
  "Do",
480
  "nt"
481
  ],
 
 
 
 
482
  "Haven't": [
483
  "Have",
484
  "n't"
485
  ],
 
 
 
 
 
 
 
 
 
 
 
 
486
  "ain't": [
487
  "ai",
488
  "n't"
@@ -493,15 +687,19 @@
493
  ],
494
  "aren't": [
495
  "are",
496
- "n't"
497
  ],
498
  "arent": [
499
  "are",
500
  "nt"
501
  ],
 
 
 
 
502
  "can't": [
503
- "ca",
504
- "n't"
505
  ],
506
  "cannot": [
507
  "can",
@@ -527,9 +725,13 @@
527
  "does",
528
  "n't"
529
  ],
 
 
 
 
530
  "don't": [
531
  "do",
532
- "n't"
533
  ],
534
  "dont": [
535
  "do",
@@ -543,6 +745,14 @@
543
  "have",
544
  "n't"
545
  ],
 
 
 
 
 
 
 
 
546
  "wasent": [
547
  "wase",
548
  "nt"
@@ -556,8 +766,8 @@
556
  "n\u2019t"
557
  ],
558
  "won't": [
559
- "wo",
560
- "n't"
561
  ],
562
  "wont": [
563
  "wo",
@@ -632,11 +842,39 @@
632
  ]
633
  },
634
  "INTJ+PUNCT": {
 
 
 
 
635
  "ta',": [
636
  "ta'",
637
  ","
638
  ]
639
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  "NOUN+AUX": {
641
  "breathingshould": [
642
  "breathing",
@@ -645,6 +883,10 @@
645
  "doghas": [
646
  "dog",
647
  "has"
 
 
 
 
648
  ]
649
  },
650
  "NOUN+NOUN": {
@@ -652,6 +894,14 @@
652
  "Driving",
653
  "school"
654
  ],
 
 
 
 
 
 
 
 
655
  "counselingservices": [
656
  "counseling",
657
  "services"
@@ -672,6 +922,10 @@
672
  "engineering",
673
  "services"
674
  ],
 
 
 
 
675
  "kingsnake": [
676
  "king",
677
  "snake"
@@ -684,10 +938,30 @@
684
  "lighting",
685
  "showroom"
686
  ],
 
 
 
 
 
 
 
 
687
  "mpgnumber": [
688
  "mpg",
689
  "number"
690
  ],
 
 
 
 
 
 
 
 
 
 
 
 
691
  "testingschedule": [
692
  "testing",
693
  "schedule"
@@ -695,6 +969,18 @@
695
  "towingservices": [
696
  "towing",
697
  "services"
 
 
 
 
 
 
 
 
 
 
 
 
698
  ]
699
  },
700
  "NOUN+NOUN+VERB": {
@@ -757,6 +1043,10 @@
757
  "cat",
758
  "s"
759
  ],
 
 
 
 
760
  "dads": [
761
  "dad",
762
  "s"
@@ -848,9 +1138,17 @@
848
  "workers": [
849
  "worker",
850
  "s"
 
 
 
 
851
  ]
852
  },
853
  "NOUN+PUNCT": {
 
 
 
 
854
  "Fax.(": [
855
  "Fax.",
856
  "("
@@ -876,7 +1174,21 @@
876
  ","
877
  ]
878
  },
 
 
 
 
 
 
879
  "NOUN+VERB": {
 
 
 
 
 
 
 
 
880
  "thingsounded": [
881
  "thing",
882
  "sounded"
@@ -964,12 +1276,24 @@
964
  "r"
965
  ]
966
  },
 
 
 
 
 
 
967
  "PRON+PART": {
968
  "anyones": [
969
  "anyone",
970
  "s"
971
  ]
972
  },
 
 
 
 
 
 
973
  "PRON+VERB": {
974
  "Thats": [
975
  "That",
@@ -1004,6 +1328,22 @@
1004
  "thank"
1005
  ]
1006
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1007
  "PROPN+PART": {
1008
  "BJs": [
1009
  "BJ",
@@ -1069,6 +1409,10 @@
1069
  "B.",
1070
  ","
1071
  ],
 
 
 
 
1072
  "D.C.,": [
1073
  "D.C.",
1074
  ","
@@ -1106,15 +1450,44 @@
1106
  "-"
1107
  ]
1108
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1109
  "PUNCT+PUNCT": {
1110
  "!\"": [
1111
  "!",
1112
  "\""
1113
  ],
 
 
 
 
1114
  "!)": [
1115
  "!",
1116
  ")"
1117
  ],
 
 
 
 
 
 
 
 
1118
  "\"(": [
1119
  "\"",
1120
  "("
@@ -1131,6 +1504,10 @@
1131
  "\"",
1132
  "-"
1133
  ],
 
 
 
 
1134
  "\"...": [
1135
  "\"",
1136
  "..."
@@ -1139,6 +1516,10 @@
1139
  "\"",
1140
  ":"
1141
  ],
 
 
 
 
1142
  "')": [
1143
  "'",
1144
  ")"
@@ -1155,6 +1536,18 @@
1155
  "(",
1156
  "\"\""
1157
  ],
 
 
 
 
 
 
 
 
 
 
 
 
1158
  ")\"": [
1159
  ")",
1160
  "\""
@@ -1163,6 +1556,10 @@
1163
  ")",
1164
  "("
1165
  ],
 
 
 
 
1166
  "),": [
1167
  ")",
1168
  ","
@@ -1207,18 +1604,34 @@
1207
  "-",
1208
  "\""
1209
  ],
 
 
 
 
1210
  ".'": [
1211
  ".",
1212
  "'"
1213
  ],
 
 
 
 
1214
  "...\"": [
1215
  "...",
1216
  "\""
1217
  ],
 
 
 
 
1218
  "?\"": [
1219
  "?",
1220
  "\""
1221
  ],
 
 
 
 
1222
  "?)": [
1223
  "?",
1224
  ")"
@@ -1230,6 +1643,10 @@
1230
  "],": [
1231
  "]",
1232
  ","
 
 
 
 
1233
  ]
1234
  },
1235
  "PUNCT+PUNCT+PUNCT": {
@@ -1247,6 +1664,11 @@
1247
  "?",
1248
  ")",
1249
  ","
 
 
 
 
 
1250
  ]
1251
  },
1252
  "PUNCT+SYM": {
@@ -1255,6 +1677,13 @@
1255
  "$"
1256
  ]
1257
  },
 
 
 
 
 
 
 
1258
  "SYM+PUNCT": {
1259
  "$,": [
1260
  "$",
@@ -1264,6 +1693,10 @@
1264
  "%",
1265
  ")"
1266
  ],
 
 
 
 
1267
  "-'": [
1268
  "-",
1269
  "'"
@@ -1276,6 +1709,10 @@
1276
  ]
1277
  },
1278
  "VERB+ADJ": {
 
 
 
 
1279
  "doingshoddy": [
1280
  "doing",
1281
  "shoddy"
@@ -1284,6 +1721,18 @@
1284
  "facing",
1285
  "serious"
1286
  ],
 
 
 
 
 
 
 
 
 
 
 
 
1287
  "outsourcingspecial": [
1288
  "outsourcing",
1289
  "special"
@@ -1291,6 +1740,14 @@
1291
  "reinforcingsimilar": [
1292
  "reinforcing",
1293
  "similar"
 
 
 
 
 
 
 
 
1294
  ]
1295
  },
1296
  "VERB+ADJ+CCONJ": {
@@ -1321,18 +1778,97 @@
1321
  "investigating",
1322
  "since"
1323
  ],
 
 
 
 
1324
  "setup": [
1325
  "set",
1326
  "up"
1327
  ]
1328
  },
1329
  "VERB+ADV": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1330
  "totalingsomewhere": [
1331
  "totaling",
1332
  "somewhere"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1333
  ]
1334
  },
1335
  "VERB+DET": {
 
 
 
 
 
 
 
 
1336
  "discussingsome": [
1337
  "discussing",
1338
  "some"
@@ -1353,10 +1889,26 @@
1353
  "experiencing",
1354
  "some"
1355
  ],
 
 
 
 
 
 
 
 
1356
  "meetingeach": [
1357
  "meeting",
1358
  "each"
1359
  ],
 
 
 
 
 
 
 
 
1360
  "readingsome": [
1361
  "reading",
1362
  "some"
@@ -1368,13 +1920,37 @@
1368
  "replacingsome": [
1369
  "replacing",
1370
  "some"
 
 
 
 
 
 
 
 
1371
  ]
1372
  },
1373
  "VERB+NOUN": {
 
 
 
 
 
 
 
 
1374
  "doingscissors": [
1375
  "doing",
1376
  "scissors"
1377
  ],
 
 
 
 
 
 
 
 
1378
  "followingsuggestion": [
1379
  "following",
1380
  "suggestion"
@@ -1383,6 +1959,18 @@
1383
  "forming",
1384
  "eggs"
1385
  ],
 
 
 
 
 
 
 
 
 
 
 
 
1386
  "meaningshell": [
1387
  "meaning",
1388
  "shell"
@@ -1391,6 +1979,10 @@
1391
  "playing",
1392
  "sports"
1393
  ],
 
 
 
 
1394
  "producingshrubs": [
1395
  "producing",
1396
  "shrubs"
@@ -1402,9 +1994,25 @@
1402
  "quittingsmoking": [
1403
  "quitting",
1404
  "smoking"
 
 
 
 
 
 
 
 
 
 
 
 
1405
  ]
1406
  },
1407
  "VERB+PART": {
 
 
 
 
1408
  "Gotta": [
1409
  "Got",
1410
  "ta"
@@ -1413,6 +2021,10 @@
1413
  "are",
1414
  "n't"
1415
  ],
 
 
 
 
1416
  "doesn't": [
1417
  "does",
1418
  "n't"
@@ -1421,6 +2033,10 @@
1421
  "do",
1422
  "n't"
1423
  ],
 
 
 
 
1424
  "gonna": [
1425
  "gon",
1426
  "na"
@@ -1429,6 +2045,10 @@
1429
  "got",
1430
  "ta"
1431
  ],
 
 
 
 
1432
  "wana": [
1433
  "wan",
1434
  "a"
@@ -1447,13 +2067,57 @@
1447
  "call",
1448
  "you"
1449
  ],
 
 
 
 
1450
  "doingeverything": [
1451
  "doing",
1452
  "everything"
1453
  ],
 
 
 
 
1454
  "lets": [
1455
  "let",
1456
  "s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1457
  ]
1458
  },
1459
  "VERB+SCONJ": {
@@ -1462,7 +2126,37 @@
1462
  "whether"
1463
  ]
1464
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1465
  "X+PUNCT": {
 
 
 
 
 
 
 
 
1466
  "etc.)": [
1467
  "etc.",
1468
  ")"
@@ -1477,6 +2171,10 @@
1477
  ]
1478
  },
1479
  "X+X": {
 
 
 
 
1480
  ").doc": [
1481
  ")",
1482
  ".doc"
 
12
  "id2label": {
13
  "0": "ADJ",
14
  "1": "ADP",
15
+ "2": "ADP+DET",
16
+ "3": "ADP+PRON",
17
+ "4": "ADV",
18
+ "5": "ADV+AUX",
19
+ "6": "ADV+PART",
20
+ "7": "AUX",
21
+ "8": "AUX+PART",
22
+ "9": "B-ADJ",
23
+ "10": "B-ADJ+ADJ",
24
+ "11": "B-ADJ+NOUN",
25
+ "12": "B-ADJ+NOUN+NOUN",
26
+ "13": "B-ADJ+PART",
27
+ "14": "B-ADJ+PROPN",
28
+ "15": "B-ADJ+PUNCT",
29
+ "16": "B-ADP",
30
+ "17": "B-ADP+ADJ",
31
+ "18": "B-ADP+NOUN",
32
+ "19": "B-ADP+PRON",
33
+ "20": "B-ADV",
34
+ "21": "B-ADV+AUX",
35
+ "22": "B-ADV+PUNCT",
36
+ "23": "B-AUX",
37
+ "24": "B-AUX+ADV",
38
+ "25": "B-AUX+PART",
39
+ "26": "B-AUX+PART+VERB",
40
+ "27": "B-AUX+VERB",
41
+ "28": "B-CCONJ",
42
+ "29": "B-DET",
43
+ "30": "B-DET+AUX",
44
+ "31": "B-DET+NOUN",
45
+ "32": "B-INTJ",
46
+ "33": "B-INTJ+PUNCT",
47
+ "34": "B-NOUN",
48
+ "35": "B-NOUN+ADJ",
49
+ "36": "B-NOUN+ADP",
50
+ "37": "B-NOUN+AUX",
51
+ "38": "B-NOUN+NOUN",
52
+ "39": "B-NOUN+NOUN+VERB",
53
+ "40": "B-NOUN+PART",
54
+ "41": "B-NOUN+PROPN",
55
+ "42": "B-NOUN+PUNCT",
56
+ "43": "B-NOUN+SCONJ",
57
+ "44": "B-NOUN+VERB",
58
+ "45": "B-NUM",
59
+ "46": "B-PART",
60
+ "47": "B-PRON",
61
+ "48": "B-PRON+ADJ",
62
+ "49": "B-PRON+ADV",
63
+ "50": "B-PRON+AUX",
64
+ "51": "B-PRON+NOUN",
65
+ "52": "B-PRON+PART",
66
+ "53": "B-PRON+PRON",
67
+ "54": "B-PRON+VERB",
68
+ "55": "B-PROPN",
69
+ "56": "B-PROPN+ADP",
70
+ "57": "B-PROPN+AUX",
71
+ "58": "B-PROPN+PART",
72
+ "59": "B-PROPN+PROPN",
73
+ "60": "B-PROPN+PUNCT",
74
+ "61": "B-PROPN+PUNCT+PUNCT",
75
+ "62": "B-PROPN+VERB",
76
+ "63": "B-PUNCT",
77
+ "64": "B-PUNCT+PUNCT",
78
+ "65": "B-PUNCT+PUNCT+PUNCT",
79
+ "66": "B-PUNCT+SYM+PUNCT",
80
+ "67": "B-SCONJ",
81
+ "68": "B-SYM",
82
+ "69": "B-VERB",
83
+ "70": "B-VERB+ADJ",
84
+ "71": "B-VERB+ADJ+CCONJ",
85
+ "72": "B-VERB+ADP",
86
+ "73": "B-VERB+ADV",
87
+ "74": "B-VERB+ADV+PUNCT",
88
+ "75": "B-VERB+AUX",
89
+ "76": "B-VERB+CCONJ",
90
+ "77": "B-VERB+DET",
91
+ "78": "B-VERB+NOUN",
92
+ "79": "B-VERB+NOUN+CCONJ",
93
+ "80": "B-VERB+NOUN+NOUN",
94
+ "81": "B-VERB+PART",
95
+ "82": "B-VERB+PRON",
96
+ "83": "B-VERB+PRON+ADP",
97
+ "84": "B-VERB+PRON+ADV",
98
+ "85": "B-VERB+PROPN",
99
+ "86": "B-VERB+SCONJ",
100
+ "87": "B-VERB+VERB",
101
+ "88": "B-VERB+VERB+NOUN",
102
+ "89": "B-X",
103
+ "90": "B-X+PUNCT",
104
+ "91": "B-X+PUNCT+PUNCT",
105
+ "92": "B-X+X",
106
+ "93": "B-X+X+PRON",
107
+ "94": "CCONJ",
108
+ "95": "DET",
109
+ "96": "DET+NUM",
110
+ "97": "I-ADJ",
111
+ "98": "I-ADJ+ADJ",
112
+ "99": "I-ADJ+NOUN",
113
+ "100": "I-ADJ+NOUN+NOUN",
114
+ "101": "I-ADJ+PART",
115
+ "102": "I-ADJ+PROPN",
116
+ "103": "I-ADJ+PUNCT",
117
+ "104": "I-ADP",
118
+ "105": "I-ADP+ADJ",
119
+ "106": "I-ADP+NOUN",
120
+ "107": "I-ADP+PRON",
121
+ "108": "I-ADV",
122
+ "109": "I-ADV+AUX",
123
+ "110": "I-ADV+PUNCT",
124
+ "111": "I-AUX",
125
+ "112": "I-AUX+ADV",
126
+ "113": "I-AUX+PART",
127
+ "114": "I-AUX+PART+VERB",
128
+ "115": "I-AUX+VERB",
129
+ "116": "I-CCONJ",
130
+ "117": "I-DET",
131
+ "118": "I-DET+AUX",
132
+ "119": "I-DET+NOUN",
133
+ "120": "I-INTJ",
134
+ "121": "I-INTJ+PUNCT",
135
+ "122": "I-NOUN",
136
+ "123": "I-NOUN+ADJ",
137
+ "124": "I-NOUN+ADP",
138
+ "125": "I-NOUN+AUX",
139
+ "126": "I-NOUN+NOUN",
140
+ "127": "I-NOUN+NOUN+VERB",
141
+ "128": "I-NOUN+PART",
142
+ "129": "I-NOUN+PROPN",
143
+ "130": "I-NOUN+PUNCT",
144
+ "131": "I-NOUN+SCONJ",
145
+ "132": "I-NOUN+VERB",
146
+ "133": "I-NUM",
147
+ "134": "I-PART",
148
+ "135": "I-PRON",
149
+ "136": "I-PRON+ADJ",
150
+ "137": "I-PRON+ADV",
151
+ "138": "I-PRON+AUX",
152
+ "139": "I-PRON+NOUN",
153
+ "140": "I-PRON+PART",
154
+ "141": "I-PRON+PRON",
155
+ "142": "I-PRON+VERB",
156
+ "143": "I-PROPN",
157
+ "144": "I-PROPN+ADP",
158
+ "145": "I-PROPN+AUX",
159
+ "146": "I-PROPN+PART",
160
+ "147": "I-PROPN+PROPN",
161
+ "148": "I-PROPN+PUNCT",
162
+ "149": "I-PROPN+PUNCT+PUNCT",
163
+ "150": "I-PROPN+VERB",
164
+ "151": "I-PUNCT",
165
+ "152": "I-PUNCT+PUNCT",
166
+ "153": "I-PUNCT+PUNCT+PUNCT",
167
+ "154": "I-PUNCT+SYM+PUNCT",
168
+ "155": "I-SCONJ",
169
+ "156": "I-SYM",
170
+ "157": "I-VERB",
171
+ "158": "I-VERB+ADJ",
172
+ "159": "I-VERB+ADJ+CCONJ",
173
+ "160": "I-VERB+ADP",
174
+ "161": "I-VERB+ADV",
175
+ "162": "I-VERB+ADV+PUNCT",
176
+ "163": "I-VERB+AUX",
177
+ "164": "I-VERB+CCONJ",
178
+ "165": "I-VERB+DET",
179
+ "166": "I-VERB+NOUN",
180
+ "167": "I-VERB+NOUN+CCONJ",
181
+ "168": "I-VERB+NOUN+NOUN",
182
+ "169": "I-VERB+PART",
183
+ "170": "I-VERB+PRON",
184
+ "171": "I-VERB+PRON+ADP",
185
+ "172": "I-VERB+PRON+ADV",
186
+ "173": "I-VERB+PROPN",
187
+ "174": "I-VERB+SCONJ",
188
+ "175": "I-VERB+VERB",
189
+ "176": "I-VERB+VERB+NOUN",
190
+ "177": "I-X",
191
+ "178": "I-X+PUNCT",
192
+ "179": "I-X+PUNCT+PUNCT",
193
+ "180": "I-X+X",
194
+ "181": "I-X+X+PRON",
195
+ "182": "INTJ",
196
+ "183": "NOUN",
197
+ "184": "NOUN+AUX",
198
+ "185": "NOUN+PART",
199
+ "186": "NUM",
200
+ "187": "PART",
201
+ "188": "PRON",
202
+ "189": "PRON+AUX",
203
+ "190": "PRON+VERB",
204
+ "191": "PROPN",
205
+ "192": "PROPN+PART",
206
+ "193": "PUNCT",
207
+ "194": "PUNCT+PUNCT",
208
+ "195": "PUNCT+PUNCT+PUNCT",
209
+ "196": "PUNCT+SYM",
210
+ "197": "SCONJ",
211
+ "198": "SYM",
212
+ "199": "SYM+PUNCT",
213
+ "200": "SYM+SYM",
214
+ "201": "VERB",
215
+ "202": "VERB+ADP",
216
+ "203": "VERB+PART",
217
+ "204": "VERB+PRON",
218
+ "205": "VERB+VERB",
219
+ "206": "X",
220
+ "207": "X+X"
221
  },
222
  "initializer_range": 0.02,
223
  "intermediate_size": 3072,
224
  "label2id": {
225
  "ADJ": 0,
226
  "ADP": 1,
227
+ "ADP+DET": 2,
228
+ "ADP+PRON": 3,
229
+ "ADV": 4,
230
+ "ADV+AUX": 5,
231
+ "ADV+PART": 6,
232
+ "AUX": 7,
233
+ "AUX+PART": 8,
234
+ "B-ADJ": 9,
235
+ "B-ADJ+ADJ": 10,
236
+ "B-ADJ+NOUN": 11,
237
+ "B-ADJ+NOUN+NOUN": 12,
238
+ "B-ADJ+PART": 13,
239
+ "B-ADJ+PROPN": 14,
240
+ "B-ADJ+PUNCT": 15,
241
+ "B-ADP": 16,
242
+ "B-ADP+ADJ": 17,
243
+ "B-ADP+NOUN": 18,
244
+ "B-ADP+PRON": 19,
245
+ "B-ADV": 20,
246
+ "B-ADV+AUX": 21,
247
+ "B-ADV+PUNCT": 22,
248
+ "B-AUX": 23,
249
+ "B-AUX+ADV": 24,
250
+ "B-AUX+PART": 25,
251
+ "B-AUX+PART+VERB": 26,
252
+ "B-AUX+VERB": 27,
253
+ "B-CCONJ": 28,
254
+ "B-DET": 29,
255
+ "B-DET+AUX": 30,
256
+ "B-DET+NOUN": 31,
257
+ "B-INTJ": 32,
258
+ "B-INTJ+PUNCT": 33,
259
+ "B-NOUN": 34,
260
+ "B-NOUN+ADJ": 35,
261
+ "B-NOUN+ADP": 36,
262
+ "B-NOUN+AUX": 37,
263
+ "B-NOUN+NOUN": 38,
264
+ "B-NOUN+NOUN+VERB": 39,
265
+ "B-NOUN+PART": 40,
266
+ "B-NOUN+PROPN": 41,
267
+ "B-NOUN+PUNCT": 42,
268
+ "B-NOUN+SCONJ": 43,
269
+ "B-NOUN+VERB": 44,
270
+ "B-NUM": 45,
271
+ "B-PART": 46,
272
+ "B-PRON": 47,
273
+ "B-PRON+ADJ": 48,
274
+ "B-PRON+ADV": 49,
275
+ "B-PRON+AUX": 50,
276
+ "B-PRON+NOUN": 51,
277
+ "B-PRON+PART": 52,
278
+ "B-PRON+PRON": 53,
279
+ "B-PRON+VERB": 54,
280
+ "B-PROPN": 55,
281
+ "B-PROPN+ADP": 56,
282
+ "B-PROPN+AUX": 57,
283
+ "B-PROPN+PART": 58,
284
+ "B-PROPN+PROPN": 59,
285
+ "B-PROPN+PUNCT": 60,
286
+ "B-PROPN+PUNCT+PUNCT": 61,
287
+ "B-PROPN+VERB": 62,
288
+ "B-PUNCT": 63,
289
+ "B-PUNCT+PUNCT": 64,
290
+ "B-PUNCT+PUNCT+PUNCT": 65,
291
+ "B-PUNCT+SYM+PUNCT": 66,
292
+ "B-SCONJ": 67,
293
+ "B-SYM": 68,
294
+ "B-VERB": 69,
295
+ "B-VERB+ADJ": 70,
296
+ "B-VERB+ADJ+CCONJ": 71,
297
+ "B-VERB+ADP": 72,
298
+ "B-VERB+ADV": 73,
299
+ "B-VERB+ADV+PUNCT": 74,
300
+ "B-VERB+AUX": 75,
301
+ "B-VERB+CCONJ": 76,
302
+ "B-VERB+DET": 77,
303
+ "B-VERB+NOUN": 78,
304
+ "B-VERB+NOUN+CCONJ": 79,
305
+ "B-VERB+NOUN+NOUN": 80,
306
+ "B-VERB+PART": 81,
307
+ "B-VERB+PRON": 82,
308
+ "B-VERB+PRON+ADP": 83,
309
+ "B-VERB+PRON+ADV": 84,
310
+ "B-VERB+PROPN": 85,
311
+ "B-VERB+SCONJ": 86,
312
+ "B-VERB+VERB": 87,
313
+ "B-VERB+VERB+NOUN": 88,
314
+ "B-X": 89,
315
+ "B-X+PUNCT": 90,
316
+ "B-X+PUNCT+PUNCT": 91,
317
+ "B-X+X": 92,
318
+ "B-X+X+PRON": 93,
319
+ "CCONJ": 94,
320
+ "DET": 95,
321
+ "DET+NUM": 96,
322
+ "I-ADJ": 97,
323
+ "I-ADJ+ADJ": 98,
324
+ "I-ADJ+NOUN": 99,
325
+ "I-ADJ+NOUN+NOUN": 100,
326
+ "I-ADJ+PART": 101,
327
+ "I-ADJ+PROPN": 102,
328
+ "I-ADJ+PUNCT": 103,
329
+ "I-ADP": 104,
330
+ "I-ADP+ADJ": 105,
331
+ "I-ADP+NOUN": 106,
332
+ "I-ADP+PRON": 107,
333
+ "I-ADV": 108,
334
+ "I-ADV+AUX": 109,
335
+ "I-ADV+PUNCT": 110,
336
+ "I-AUX": 111,
337
+ "I-AUX+ADV": 112,
338
+ "I-AUX+PART": 113,
339
+ "I-AUX+PART+VERB": 114,
340
+ "I-AUX+VERB": 115,
341
+ "I-CCONJ": 116,
342
+ "I-DET": 117,
343
+ "I-DET+AUX": 118,
344
+ "I-DET+NOUN": 119,
345
+ "I-INTJ": 120,
346
+ "I-INTJ+PUNCT": 121,
347
+ "I-NOUN": 122,
348
+ "I-NOUN+ADJ": 123,
349
+ "I-NOUN+ADP": 124,
350
+ "I-NOUN+AUX": 125,
351
+ "I-NOUN+NOUN": 126,
352
+ "I-NOUN+NOUN+VERB": 127,
353
+ "I-NOUN+PART": 128,
354
+ "I-NOUN+PROPN": 129,
355
+ "I-NOUN+PUNCT": 130,
356
+ "I-NOUN+SCONJ": 131,
357
+ "I-NOUN+VERB": 132,
358
+ "I-NUM": 133,
359
+ "I-PART": 134,
360
+ "I-PRON": 135,
361
+ "I-PRON+ADJ": 136,
362
+ "I-PRON+ADV": 137,
363
+ "I-PRON+AUX": 138,
364
+ "I-PRON+NOUN": 139,
365
+ "I-PRON+PART": 140,
366
+ "I-PRON+PRON": 141,
367
+ "I-PRON+VERB": 142,
368
+ "I-PROPN": 143,
369
+ "I-PROPN+ADP": 144,
370
+ "I-PROPN+AUX": 145,
371
+ "I-PROPN+PART": 146,
372
+ "I-PROPN+PROPN": 147,
373
+ "I-PROPN+PUNCT": 148,
374
+ "I-PROPN+PUNCT+PUNCT": 149,
375
+ "I-PROPN+VERB": 150,
376
+ "I-PUNCT": 151,
377
+ "I-PUNCT+PUNCT": 152,
378
+ "I-PUNCT+PUNCT+PUNCT": 153,
379
+ "I-PUNCT+SYM+PUNCT": 154,
380
+ "I-SCONJ": 155,
381
+ "I-SYM": 156,
382
+ "I-VERB": 157,
383
+ "I-VERB+ADJ": 158,
384
+ "I-VERB+ADJ+CCONJ": 159,
385
+ "I-VERB+ADP": 160,
386
+ "I-VERB+ADV": 161,
387
+ "I-VERB+ADV+PUNCT": 162,
388
+ "I-VERB+AUX": 163,
389
+ "I-VERB+CCONJ": 164,
390
+ "I-VERB+DET": 165,
391
+ "I-VERB+NOUN": 166,
392
+ "I-VERB+NOUN+CCONJ": 167,
393
+ "I-VERB+NOUN+NOUN": 168,
394
+ "I-VERB+PART": 169,
395
+ "I-VERB+PRON": 170,
396
+ "I-VERB+PRON+ADP": 171,
397
+ "I-VERB+PRON+ADV": 172,
398
+ "I-VERB+PROPN": 173,
399
+ "I-VERB+SCONJ": 174,
400
+ "I-VERB+VERB": 175,
401
+ "I-VERB+VERB+NOUN": 176,
402
+ "I-X": 177,
403
+ "I-X+PUNCT": 178,
404
+ "I-X+PUNCT+PUNCT": 179,
405
+ "I-X+X": 180,
406
+ "I-X+X+PRON": 181,
407
+ "INTJ": 182,
408
+ "NOUN": 183,
409
+ "NOUN+AUX": 184,
410
+ "NOUN+PART": 185,
411
+ "NUM": 186,
412
+ "PART": 187,
413
+ "PRON": 188,
414
+ "PRON+AUX": 189,
415
+ "PRON+VERB": 190,
416
+ "PROPN": 191,
417
+ "PROPN+PART": 192,
418
+ "PUNCT": 193,
419
+ "PUNCT+PUNCT": 194,
420
+ "PUNCT+PUNCT+PUNCT": 195,
421
+ "PUNCT+SYM": 196,
422
+ "SCONJ": 197,
423
+ "SYM": 198,
424
+ "SYM+PUNCT": 199,
425
+ "SYM+SYM": 200,
426
+ "VERB": 201,
427
+ "VERB+ADP": 202,
428
+ "VERB+PART": 203,
429
+ "VERB+PRON": 204,
430
+ "VERB+VERB": 205,
431
+ "X": 206,
432
+ "X+X": 207
433
  },
434
  "layer_norm_eps": 1e-05,
435
  "max_position_embeddings": 514,
 
448
  "interestingsocial": [
449
  "interesting",
450
  "social"
451
+ ],
452
+ "longeight-inch": [
453
+ "long",
454
+ "eight-inch"
455
+ ],
456
+ "pressingsocial": [
457
+ "pressing",
458
+ "social"
459
  ]
460
  },
461
  "ADJ+NOUN": {
 
463
  "big",
464
  "source"
465
  ],
466
+ "contrastingseries": [
467
+ "contrasting",
468
+ "series"
469
+ ],
470
  "distractingelements": [
471
  "distracting",
472
  "elements"
473
  ],
474
+ "fascinatingshop": [
475
+ "fascinating",
476
+ "shop"
477
+ ],
478
  "gruelingsanctions": [
479
  "grueling",
480
  "sanctions"
481
  ],
482
+ "increasingsafety": [
483
+ "increasing",
484
+ "safety"
485
+ ],
486
  "longexposures": [
487
  "long",
488
  "exposures"
 
491
  "long",
492
  "hair"
493
  ],
494
+ "longhistory": [
495
+ "long",
496
+ "history"
497
+ ],
498
  "ongoingsummaries": [
499
  "ongoing",
500
  "summaries"
 
503
  "pre-meeting",
504
  "site"
505
  ],
506
+ "rallyingsigns": [
507
+ "rallying",
508
+ "signs"
509
+ ],
510
+ "revenue-raisingservices": [
511
+ "revenue-raising",
512
+ "services"
513
+ ],
514
+ "self-questioningshrug": [
515
+ "self-questioning",
516
+ "shrug"
517
+ ],
518
+ "simperingsmile": [
519
+ "simpering",
520
+ "smile"
521
+ ],
522
  "stronghints": [
523
  "strong",
524
  "hints"
525
+ ],
526
+ "wizardingsport": [
527
+ "wizarding",
528
+ "sport"
529
  ]
530
  },
531
  "ADJ+PART": {
 
541
  ]
542
  },
543
  "ADJ+PUNCT": {
544
+ "Jr..": [
545
+ "Jr.",
546
+ "."
547
+ ],
548
  "e.g.:": [
549
  "e.g.",
550
  ":"
 
554
  ","
555
  ]
556
  },
557
+ "ADP+DET": {
558
+ "des": [
559
+ "de",
560
+ "les"
561
+ ]
562
+ },
563
  "ADP+NOUN": {
564
  "Infact": [
565
  "In",
 
580
  "Heres": [
581
  "Here",
582
  "s"
583
+ ],
584
+ "longhave": [
585
+ "long",
586
+ "have"
587
  ]
588
  },
589
  "ADV+PART": {
 
597
  "E.g.",
598
  ","
599
  ],
600
+ "e.g.,": [
601
+ "e.g.",
602
+ ","
603
+ ],
604
  "i.e.,": [
605
  "i.e.",
606
  ","
 
610
  ":"
611
  ]
612
  },
613
+ "AUX+ADV": {
614
+ "cannot": [
615
+ "can",
616
+ "not"
617
+ ]
618
+ },
619
  "AUX+PART": {
620
  "Aren't": [
621
  "Are",
622
  "n't"
623
  ],
624
+ "Aren\u2019t": [
625
+ "Are",
626
+ "n\u2019t"
627
+ ],
628
  "CANT": [
629
  "CA",
630
  "NT"
 
637
  "Can",
638
  "not"
639
  ],
640
+ "Can\u2019t": [
641
+ "Ca",
642
+ "n\u2019t"
643
+ ],
644
  "DON'T": [
645
  "DO",
646
  "N'T"
 
657
  "Do",
658
  "nt"
659
  ],
660
+ "Don\u2019t": [
661
+ "Do",
662
+ "n\u2019t"
663
+ ],
664
  "Haven't": [
665
  "Have",
666
  "n't"
667
  ],
668
+ "Isn't": [
669
+ "Is",
670
+ "n't"
671
+ ],
672
+ "Isn\u2019t": [
673
+ "Is",
674
+ "n\u2019t"
675
+ ],
676
+ "Won't": [
677
+ "Wo",
678
+ "n't"
679
+ ],
680
  "ain't": [
681
  "ai",
682
  "n't"
 
687
  ],
688
  "aren't": [
689
  "are",
690
+ "not"
691
  ],
692
  "arent": [
693
  "are",
694
  "nt"
695
  ],
696
+ "aren\u2019t": [
697
+ "are",
698
+ "n\u2019t"
699
+ ],
700
  "can't": [
701
+ "can",
702
+ "not"
703
  ],
704
  "cannot": [
705
  "can",
 
725
  "does",
726
  "n't"
727
  ],
728
+ "doesn\u2019t": [
729
+ "does",
730
+ "n\u2019t"
731
+ ],
732
  "don't": [
733
  "do",
734
+ "not"
735
  ],
736
  "dont": [
737
  "do",
 
745
  "have",
746
  "n't"
747
  ],
748
+ "shan't": [
749
+ "sha",
750
+ "n't"
751
+ ],
752
+ "shouldn't": [
753
+ "should",
754
+ "not"
755
+ ],
756
  "wasent": [
757
  "wase",
758
  "nt"
 
766
  "n\u2019t"
767
  ],
768
  "won't": [
769
+ "will",
770
+ "not"
771
  ],
772
  "wont": [
773
  "wo",
 
842
  ]
843
  },
844
  "INTJ+PUNCT": {
845
+ "etc.'": [
846
+ "etc.",
847
+ "'"
848
+ ],
849
  "ta',": [
850
  "ta'",
851
  ","
852
  ]
853
  },
854
+ "NOUN+ADJ": {
855
+ "nothingspecial": [
856
+ "nothing",
857
+ "special"
858
+ ]
859
+ },
860
+ "NOUN+ADP": {
861
+ "flagon": [
862
+ "flag",
863
+ "on"
864
+ ],
865
+ "groundsof": [
866
+ "grounds",
867
+ "of"
868
+ ],
869
+ "hashtagon": [
870
+ "hashtag",
871
+ "on"
872
+ ],
873
+ "meetingsince": [
874
+ "meeting",
875
+ "since"
876
+ ]
877
+ },
878
  "NOUN+AUX": {
879
  "breathingshould": [
880
  "breathing",
 
883
  "doghas": [
884
  "dog",
885
  "has"
886
+ ],
887
+ "mythmakingshould": [
888
+ "mythmaking",
889
+ "should"
890
  ]
891
  },
892
  "NOUN+NOUN": {
 
894
  "Driving",
895
  "school"
896
  ],
897
+ "bakingsheet": [
898
+ "baking",
899
+ "sheet"
900
+ ],
901
+ "bakingsoda": [
902
+ "baking",
903
+ "soda"
904
+ ],
905
  "counselingservices": [
906
  "counseling",
907
  "services"
 
922
  "engineering",
923
  "services"
924
  ],
925
+ "eveningschedule": [
926
+ "evening",
927
+ "schedule"
928
+ ],
929
  "kingsnake": [
930
  "king",
931
  "snake"
 
938
  "lighting",
939
  "showroom"
940
  ],
941
+ "lightingsources": [
942
+ "lighting",
943
+ "sources"
944
+ ],
945
+ "loggingsites": [
946
+ "logging",
947
+ "sites"
948
+ ],
949
  "mpgnumber": [
950
  "mpg",
951
  "number"
952
  ],
953
+ "plughole": [
954
+ "plug",
955
+ "hole"
956
+ ],
957
+ "runningshorts": [
958
+ "running",
959
+ "shorts"
960
+ ],
961
+ "tagsets": [
962
+ "tag",
963
+ "sets"
964
+ ],
965
  "testingschedule": [
966
  "testing",
967
  "schedule"
 
969
  "towingservices": [
970
  "towing",
971
  "services"
972
+ ],
973
+ "trainingsession": [
974
+ "training",
975
+ "session"
976
+ ],
977
+ "writingschedule": [
978
+ "writing",
979
+ "schedule"
980
+ ],
981
+ "writingsystem": [
982
+ "writing",
983
+ "system"
984
  ]
985
  },
986
  "NOUN+NOUN+VERB": {
 
1043
  "cat",
1044
  "s"
1045
  ],
1046
+ "cycles": [
1047
+ "cycle",
1048
+ "s"
1049
+ ],
1050
  "dads": [
1051
  "dad",
1052
  "s"
 
1138
  "workers": [
1139
  "worker",
1140
  "s"
1141
+ ],
1142
+ "years": [
1143
+ "year",
1144
+ "s"
1145
  ]
1146
  },
1147
  "NOUN+PUNCT": {
1148
+ "Ed.:": [
1149
+ "Ed.",
1150
+ ":"
1151
+ ],
1152
  "Fax.(": [
1153
  "Fax.",
1154
  "("
 
1174
  ","
1175
  ]
1176
  },
1177
+ "NOUN+SCONJ": {
1178
+ "buildingsince": [
1179
+ "building",
1180
+ "since"
1181
+ ]
1182
+ },
1183
  "NOUN+VERB": {
1184
+ "dogeat": [
1185
+ "dog",
1186
+ "eat"
1187
+ ],
1188
+ "morningserves": [
1189
+ "morning",
1190
+ "serves"
1191
+ ],
1192
  "thingsounded": [
1193
  "thing",
1194
  "sounded"
 
1276
  "r"
1277
  ]
1278
  },
1279
+ "PRON+NOUN": {
1280
+ "alleconomy": [
1281
+ "all",
1282
+ "economy"
1283
+ ]
1284
+ },
1285
  "PRON+PART": {
1286
  "anyones": [
1287
  "anyone",
1288
  "s"
1289
  ]
1290
  },
1291
+ "PRON+PRON": {
1292
+ "everythingshe": [
1293
+ "everything",
1294
+ "she"
1295
+ ]
1296
+ },
1297
  "PRON+VERB": {
1298
  "Thats": [
1299
  "That",
 
1328
  "thank"
1329
  ]
1330
  },
1331
+ "PROPN+ADP": {
1332
+ "Pagin": [
1333
+ "Pag",
1334
+ "in"
1335
+ ],
1336
+ "Petersburgin": [
1337
+ "Petersburg",
1338
+ "in"
1339
+ ]
1340
+ },
1341
+ "PROPN+AUX": {
1342
+ "Hedwighad": [
1343
+ "Hedwig",
1344
+ "had"
1345
+ ]
1346
+ },
1347
  "PROPN+PART": {
1348
  "BJs": [
1349
  "BJ",
 
1409
  "B.",
1410
  ","
1411
  ],
1412
+ "B.A.\"": [
1413
+ "B.A.",
1414
+ "\""
1415
+ ],
1416
  "D.C.,": [
1417
  "D.C.",
1418
  ","
 
1450
  "-"
1451
  ]
1452
  },
1453
+ "PROPN+PUNCT+PUNCT": {
1454
+ "W.H.\",": [
1455
+ "W.H.",
1456
+ "\"",
1457
+ ","
1458
+ ]
1459
+ },
1460
+ "PROPN+VERB": {
1461
+ "Orglive": [
1462
+ "Org",
1463
+ "live"
1464
+ ],
1465
+ "Pagyelped": [
1466
+ "Pag",
1467
+ "yelped"
1468
+ ]
1469
+ },
1470
  "PUNCT+PUNCT": {
1471
  "!\"": [
1472
  "!",
1473
  "\""
1474
  ],
1475
+ "!'": [
1476
+ "!",
1477
+ "'"
1478
+ ],
1479
  "!)": [
1480
  "!",
1481
  ")"
1482
  ],
1483
+ "\"!": [
1484
+ "\"",
1485
+ "!"
1486
+ ],
1487
+ "\"\"": [
1488
+ "\"",
1489
+ "\""
1490
+ ],
1491
  "\"(": [
1492
  "\"",
1493
  "("
 
1504
  "\"",
1505
  "-"
1506
  ],
1507
+ "\".": [
1508
+ "\"",
1509
+ "."
1510
+ ],
1511
  "\"...": [
1512
  "\"",
1513
  "..."
 
1516
  "\"",
1517
  ":"
1518
  ],
1519
+ "\"[": [
1520
+ "\"",
1521
+ "["
1522
+ ],
1523
  "')": [
1524
  "'",
1525
  ")"
 
1536
  "(",
1537
  "\"\""
1538
  ],
1539
+ "('": [
1540
+ "(",
1541
+ "'"
1542
+ ],
1543
+ "((": [
1544
+ "(",
1545
+ "("
1546
+ ],
1547
+ "([": [
1548
+ "(",
1549
+ "["
1550
+ ],
1551
  ")\"": [
1552
  ")",
1553
  "\""
 
1556
  ")",
1557
  "("
1558
  ],
1559
+ "))": [
1560
+ ")",
1561
+ ")"
1562
+ ],
1563
  "),": [
1564
  ")",
1565
  ","
 
1604
  "-",
1605
  "\""
1606
  ],
1607
+ ".\"": [
1608
+ ".",
1609
+ "\""
1610
+ ],
1611
  ".'": [
1612
  ".",
1613
  "'"
1614
  ],
1615
+ "..": [
1616
+ ".",
1617
+ "."
1618
+ ],
1619
  "...\"": [
1620
  "...",
1621
  "\""
1622
  ],
1623
+ "....": [
1624
+ "...",
1625
+ "."
1626
+ ],
1627
  "?\"": [
1628
  "?",
1629
  "\""
1630
  ],
1631
+ "?'": [
1632
+ "?",
1633
+ "'"
1634
+ ],
1635
  "?)": [
1636
  "?",
1637
  ")"
 
1643
  "],": [
1644
  "]",
1645
  ","
1646
+ ],
1647
+ "];": [
1648
+ "]",
1649
+ ";"
1650
  ]
1651
  },
1652
  "PUNCT+PUNCT+PUNCT": {
 
1664
  "?",
1665
  ")",
1666
  ","
1667
+ ],
1668
+ "]),": [
1669
+ "]",
1670
+ ")",
1671
+ ","
1672
  ]
1673
  },
1674
  "PUNCT+SYM": {
 
1677
  "$"
1678
  ]
1679
  },
1680
+ "PUNCT+SYM+PUNCT": {
1681
+ "(%)": [
1682
+ "(",
1683
+ "%",
1684
+ ")"
1685
+ ]
1686
+ },
1687
  "SYM+PUNCT": {
1688
  "$,": [
1689
  "$",
 
1693
  "%",
1694
  ")"
1695
  ],
1696
+ "%,": [
1697
+ "%",
1698
+ ","
1699
+ ],
1700
  "-'": [
1701
  "-",
1702
  "'"
 
1709
  ]
1710
  },
1711
  "VERB+ADJ": {
1712
+ "alteringspecific": [
1713
+ "altering",
1714
+ "specific"
1715
+ ],
1716
  "doingshoddy": [
1717
  "doing",
1718
  "shoddy"
 
1721
  "facing",
1722
  "serious"
1723
  ],
1724
+ "legalizingsame": [
1725
+ "legalizing",
1726
+ "same"
1727
+ ],
1728
+ "mixinguppercase": [
1729
+ "mixing",
1730
+ "uppercase"
1731
+ ],
1732
+ "motivatingsyntactic": [
1733
+ "motivating",
1734
+ "syntactic"
1735
+ ],
1736
  "outsourcingspecial": [
1737
  "outsourcing",
1738
  "special"
 
1740
  "reinforcingsimilar": [
1741
  "reinforcing",
1742
  "similar"
1743
+ ],
1744
+ "showingsuperb": [
1745
+ "showing",
1746
+ "superb"
1747
+ ],
1748
+ "usingsimple": [
1749
+ "using",
1750
+ "simple"
1751
  ]
1752
  },
1753
  "VERB+ADJ+CCONJ": {
 
1778
  "investigating",
1779
  "since"
1780
  ],
1781
+ "login": [
1782
+ "log",
1783
+ "in"
1784
+ ],
1785
  "setup": [
1786
  "set",
1787
  "up"
1788
  ]
1789
  },
1790
  "VERB+ADV": {
1791
+ "advancingslowly": [
1792
+ "advancing",
1793
+ "slowly"
1794
+ ],
1795
+ "behavingsplendidly": [
1796
+ "behaving",
1797
+ "splendidly"
1798
+ ],
1799
+ "bucklingslightly": [
1800
+ "buckling",
1801
+ "slightly"
1802
+ ],
1803
+ "contributingsubstantially": [
1804
+ "contributing",
1805
+ "substantially"
1806
+ ],
1807
+ "exultingeverywhere": [
1808
+ "exulting",
1809
+ "everywhere"
1810
+ ],
1811
+ "includingspecifically": [
1812
+ "including",
1813
+ "specifically"
1814
+ ],
1815
+ "movingsouthward": [
1816
+ "moving",
1817
+ "southward"
1818
+ ],
1819
+ "proposingspecifically": [
1820
+ "proposing",
1821
+ "specifically"
1822
+ ],
1823
+ "scavengingseriously": [
1824
+ "scavenging",
1825
+ "seriously"
1826
+ ],
1827
+ "swellingslightly": [
1828
+ "swelling",
1829
+ "slightly"
1830
+ ],
1831
  "totalingsomewhere": [
1832
  "totaling",
1833
  "somewhere"
1834
+ ],
1835
+ "walkinguptown": [
1836
+ "walking",
1837
+ "uptown"
1838
+ ]
1839
+ },
1840
+ "VERB+ADV+PUNCT": {
1841
+ "studyinge.g.,": [
1842
+ "studying",
1843
+ "e.g.",
1844
+ ","
1845
+ ]
1846
+ },
1847
+ "VERB+AUX": {
1848
+ "Winningshall": [
1849
+ "Winning",
1850
+ "shall"
1851
+ ],
1852
+ "copyingshould": [
1853
+ "copying",
1854
+ "should"
1855
+ ]
1856
+ },
1857
+ "VERB+CCONJ": {
1858
+ "departingeither": [
1859
+ "departing",
1860
+ "either"
1861
  ]
1862
  },
1863
  "VERB+DET": {
1864
+ "basingsome": [
1865
+ "basing",
1866
+ "some"
1867
+ ],
1868
+ "demonstratingsuch": [
1869
+ "demonstrating",
1870
+ "such"
1871
+ ],
1872
  "discussingsome": [
1873
  "discussing",
1874
  "some"
 
1889
  "experiencing",
1890
  "some"
1891
  ],
1892
+ "finishingsome": [
1893
+ "finishing",
1894
+ "some"
1895
+ ],
1896
+ "hostingsome": [
1897
+ "hosting",
1898
+ "some"
1899
+ ],
1900
  "meetingeach": [
1901
  "meeting",
1902
  "each"
1903
  ],
1904
+ "playingsome": [
1905
+ "playing",
1906
+ "some"
1907
+ ],
1908
+ "rangeach": [
1909
+ "rang",
1910
+ "each"
1911
+ ],
1912
  "readingsome": [
1913
  "reading",
1914
  "some"
 
1920
  "replacingsome": [
1921
  "replacing",
1922
  "some"
1923
+ ],
1924
+ "spendingsome": [
1925
+ "spending",
1926
+ "some"
1927
+ ],
1928
+ "usingsome": [
1929
+ "using",
1930
+ "some"
1931
  ]
1932
  },
1933
  "VERB+NOUN": {
1934
+ "continuingsource": [
1935
+ "continuing",
1936
+ "source"
1937
+ ],
1938
+ "differingschedules": [
1939
+ "differing",
1940
+ "schedules"
1941
+ ],
1942
  "doingscissors": [
1943
  "doing",
1944
  "scissors"
1945
  ],
1946
+ "expandingsystem": [
1947
+ "expanding",
1948
+ "system"
1949
+ ],
1950
+ "expressingsadness": [
1951
+ "expressing",
1952
+ "sadness"
1953
+ ],
1954
  "followingsuggestion": [
1955
  "following",
1956
  "suggestion"
 
1959
  "forming",
1960
  "eggs"
1961
  ],
1962
+ "gettingsavage": [
1963
+ "getting",
1964
+ "savage"
1965
+ ],
1966
+ "gleamingsand": [
1967
+ "gleaming",
1968
+ "sand"
1969
+ ],
1970
+ "improvingsurveillance": [
1971
+ "improving",
1972
+ "surveillance"
1973
+ ],
1974
  "meaningshell": [
1975
  "meaning",
1976
  "shell"
 
1979
  "playing",
1980
  "sports"
1981
  ],
1982
+ "printingerrors": [
1983
+ "printing",
1984
+ "errors"
1985
+ ],
1986
  "producingshrubs": [
1987
  "producing",
1988
  "shrubs"
 
1994
  "quittingsmoking": [
1995
  "quitting",
1996
  "smoking"
1997
+ ],
1998
+ "rushingslipstream": [
1999
+ "rushing",
2000
+ "slipstream"
2001
+ ],
2002
+ "seeingsomeone": [
2003
+ "seeing",
2004
+ "someone"
2005
+ ],
2006
+ "studyingsymmetry": [
2007
+ "studying",
2008
+ "symmetry"
2009
  ]
2010
  },
2011
  "VERB+PART": {
2012
+ "Gonna": [
2013
+ "Gon",
2014
+ "na"
2015
+ ],
2016
  "Gotta": [
2017
  "Got",
2018
  "ta"
 
2021
  "are",
2022
  "n't"
2023
  ],
2024
+ "didn't": [
2025
+ "did",
2026
+ "n't"
2027
+ ],
2028
  "doesn't": [
2029
  "does",
2030
  "n't"
 
2033
  "do",
2034
  "n't"
2035
  ],
2036
+ "don\u2019t": [
2037
+ "do",
2038
+ "n\u2019t"
2039
+ ],
2040
  "gonna": [
2041
  "gon",
2042
  "na"
 
2045
  "got",
2046
  "ta"
2047
  ],
2048
+ "haven't": [
2049
+ "have",
2050
+ "n't"
2051
+ ],
2052
  "wana": [
2053
  "wan",
2054
  "a"
 
2067
  "call",
2068
  "you"
2069
  ],
2070
+ "crossingeach": [
2071
+ "crossing",
2072
+ "each"
2073
+ ],
2074
  "doingeverything": [
2075
  "doing",
2076
  "everything"
2077
  ],
2078
+ "expectingsomeone": [
2079
+ "expecting",
2080
+ "someone"
2081
+ ],
2082
  "lets": [
2083
  "let",
2084
  "s"
2085
+ ],
2086
+ "slunghis": [
2087
+ "slung",
2088
+ "his"
2089
+ ]
2090
+ },
2091
+ "VERB+PRON+ADP": {
2092
+ "seeingeverythingaround": [
2093
+ "seeing",
2094
+ "everything",
2095
+ "around"
2096
+ ]
2097
+ },
2098
+ "VERB+PRON+ADV": {
2099
+ "screwingeverythingup": [
2100
+ "screwing",
2101
+ "everything",
2102
+ "up"
2103
+ ]
2104
+ },
2105
+ "VERB+PROPN": {
2106
+ "arrivingsalt": [
2107
+ "arriving",
2108
+ "salt"
2109
+ ],
2110
+ "departingsan": [
2111
+ "departing",
2112
+ "san"
2113
+ ],
2114
+ "leavingsan": [
2115
+ "leaving",
2116
+ "san"
2117
+ ],
2118
+ "leavingsunday": [
2119
+ "leaving",
2120
+ "sunday"
2121
  ]
2122
  },
2123
  "VERB+SCONJ": {
 
2126
  "whether"
2127
  ]
2128
  },
2129
+ "VERB+VERB": {
2130
+ "growingsuspended": [
2131
+ "growing",
2132
+ "suspended"
2133
+ ],
2134
+ "had": [
2135
+ "h",
2136
+ "ad"
2137
+ ]
2138
+ },
2139
+ "VERB+VERB+NOUN": {
2140
+ "crushingsleepingflowers": [
2141
+ "crushing",
2142
+ "sleeping",
2143
+ "flowers"
2144
+ ],
2145
+ "hostingvisitingschool": [
2146
+ "hosting",
2147
+ "visiting",
2148
+ "school"
2149
+ ]
2150
+ },
2151
  "X+PUNCT": {
2152
+ "al.,": [
2153
+ "al.",
2154
+ ","
2155
+ ],
2156
+ "e.g.,": [
2157
+ "e.g.",
2158
+ ","
2159
+ ],
2160
  "etc.)": [
2161
  "etc.",
2162
  ")"
 
2171
  ]
2172
  },
2173
  "X+X": {
2174
+ "'s": [
2175
+ "'",
2176
+ "s"
2177
+ ],
2178
  ").doc": [
2179
  ")",
2180
  ".doc"
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ad92b564b4a62c21032ff9e63d6e0822f95d94fe8e028707dd480f7cce0bc15
3
- size 496790863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdb22844ce42bcbc71c944f9c0b131b13a5a781104fc0b6e045d992b72c605e4
3
+ size 496947789
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5166b493ed67370b1ee21ce283c31e3b145d34723e84fdb612a77e3665f0e53b
3
- size 549334503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc86ad36997a67f073558c5b56ee902bc58917f05384d7b5134b2b40665559b
3
+ size 551806452