|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<start>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<end>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": false, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"post_processor": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": false, |
|
"use_regex": true |
|
}, |
|
"decoder": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": null, |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"vocab": { |
|
"<start>": 0, |
|
"<end>": 1, |
|
"<pad>": 2, |
|
"-": 3, |
|
".": 4, |
|
"1": 5, |
|
"2": 6, |
|
"3": 7, |
|
"<": 8, |
|
">": 9, |
|
"A": 10, |
|
"B": 11, |
|
"C": 12, |
|
"D": 13, |
|
"E": 14, |
|
"F": 15, |
|
"G": 16, |
|
"I": 17, |
|
"J": 18, |
|
"L": 19, |
|
"M": 20, |
|
"N": 21, |
|
"O": 22, |
|
"P": 23, |
|
"R": 24, |
|
"S": 25, |
|
"T": 26, |
|
"U": 27, |
|
"V": 28, |
|
"W": 29, |
|
"X": 30, |
|
"Z": 31, |
|
"a": 32, |
|
"b": 33, |
|
"c": 34, |
|
"d": 35, |
|
"e": 36, |
|
"f": 37, |
|
"g": 38, |
|
"h": 39, |
|
"i": 40, |
|
"k": 41, |
|
"l": 42, |
|
"m": 43, |
|
"n": 44, |
|
"o": 45, |
|
"p": 46, |
|
"r": 47, |
|
"s": 48, |
|
"t": 49, |
|
"u": 50, |
|
"v": 51, |
|
"w": 52, |
|
"y": 53, |
|
"Ġ": 54, |
|
"ar": 55, |
|
"nd": 56, |
|
"st": 57, |
|
"art": 58, |
|
"end": 59, |
|
"Ġ<": 60, |
|
"start": 61, |
|
"CN": 62, |
|
"II": 63, |
|
"ĠC": 64, |
|
"CNJ": 65, |
|
"ĠCCNJ": 66, |
|
"SG": 67, |
|
"ĠL": 68, |
|
"OC": 69, |
|
"ou": 70, |
|
"ĠI": 71, |
|
"on": 72, |
|
"PL": 73, |
|
"ĠP": 74, |
|
"ĠLOC": 75, |
|
"ho": 76, |
|
"Ġg": 77, |
|
"Ġs": 78, |
|
"in": 79, |
|
"AS": 80, |
|
"Ġgo": 81, |
|
"ĠN": 82, |
|
"ĠD": 83, |
|
"Ġw": 84, |
|
"ĠPR": 85, |
|
"Ġt": 86, |
|
"ĠIN": 87, |
|
"Ġ1": 88, |
|
"Ġp": 89, |
|
"SP": 90, |
|
"MP": 91, |
|
"OMP": 92, |
|
"ĠCOMP": 93, |
|
"Ġh": 94, |
|
"TR": 95, |
|
"EP": 96, |
|
"le": 97, |
|
"ck": 98, |
|
"Ġl": 99, |
|
"OSP": 100, |
|
"ĠPROSP": 101, |
|
"ke": 102, |
|
"ll": 103, |
|
"se": 104, |
|
"ID": 105, |
|
"re": 106, |
|
"Ġar": 107, |
|
"VB": 108, |
|
"ĠLVB": 109, |
|
"ay": 110, |
|
"Ġin": 111, |
|
"out": 112, |
|
"CCNJ": 113, |
|
"or": 114, |
|
"CEP": 115, |
|
"ĠINCEP": 116, |
|
"EG": 117, |
|
"FOC": 118, |
|
"te": 119, |
|
"ĠNEG": 120, |
|
"ake": 121, |
|
"ound": 122, |
|
"Ġaround": 123, |
|
"WID": 124, |
|
"Ġf": 125, |
|
"ĠDWID": 126, |
|
"Ġon": 127, |
|
"PAS": 128, |
|
"me": 129, |
|
"PASS": 130, |
|
"an": 131, |
|
"ee": 132, |
|
"pe": 133, |
|
"EAS": 134, |
|
"REAS": 135, |
|
"Ġb": 136, |
|
"ĠREAS": 137, |
|
"ouse": 138, |
|
"Ġpi": 139, |
|
"ĠS": 140, |
|
"co": 141, |
|
"LZ": 142, |
|
"MLZ": 143, |
|
"ĠNMLZ": 144, |
|
"ork": 145, |
|
"it": 146, |
|
"Ġm": 147, |
|
"BL": 148, |
|
"FV": 149, |
|
"OBL": 150, |
|
"PFV": 151, |
|
"ĠIPFV": 152, |
|
"AN": 153, |
|
"mp": 154, |
|
"to": 155, |
|
"Ġwho": 156, |
|
"Ġhouse": 157, |
|
"Ġpipe": 158, |
|
"comp": 159, |
|
"Ġhit": 160, |
|
"ac": 161, |
|
"ain": 162, |
|
"all": 163, |
|
"Ġtr": 164, |
|
"ĠINS": 165, |
|
"ear": 166, |
|
"ow": 167, |
|
"oll": 168, |
|
"Ġtake": 169, |
|
"Ġfoll": 170, |
|
"Ġfollow": 171, |
|
"RR": 172, |
|
"Ġout": 173, |
|
"AU": 174, |
|
"ĠOBL": 175, |
|
"AUS": 176, |
|
"ri": 177, |
|
"ong": 178, |
|
"SX": 179, |
|
"ack": 180, |
|
"lm": 181, |
|
"ve": 182, |
|
"Ġo": 183, |
|
"holm": 184, |
|
"Ġsee": 185, |
|
"ckholm": 186, |
|
"ĠSto": 187, |
|
"ĠStockholm": 188, |
|
"CAUS": 189, |
|
"IP": 190, |
|
"TIP": 191, |
|
"ly": 192, |
|
"od": 193, |
|
"par": 194, |
|
"Ġcomp": 195, |
|
"Ġgood": 196, |
|
"Ġwork": 197, |
|
"Ġth": 198, |
|
"lete": 199, |
|
"ANTIP": 200, |
|
"Ġcomplete": 201, |
|
"Ġcompletely": 202, |
|
"ER": 203, |
|
"VER": 204, |
|
"ĠVER": 205, |
|
"Ġsay": 206, |
|
"DM": 207, |
|
"IS": 208, |
|
"he": 209, |
|
"ir": 210, |
|
"rn": 211, |
|
"rt": 212, |
|
"tu": 213, |
|
"Ġ3": 214, |
|
"III": 215, |
|
"hort": 216, |
|
"Ġshort": 217, |
|
"Ġli": 218, |
|
"Ġmake": 219, |
|
"Ġtry": 220, |
|
"turn": 221, |
|
"ab": 222, |
|
"un": 223, |
|
"Ġlay": 224, |
|
"able": 225, |
|
"AR": 226, |
|
"ca": 227, |
|
"do": 228, |
|
"way": 229, |
|
"Ġdo": 230, |
|
"ĠIRR": 231, |
|
"ĠPAR": 232, |
|
"SPT": 233, |
|
"Ġdoor": 234, |
|
"ĠPART": 235, |
|
"DE": 236, |
|
"OX": 237, |
|
"PR": 238, |
|
"work": 239, |
|
"Ġre": 240, |
|
"home": 241, |
|
"Ġwh": 242, |
|
"DEM": 243, |
|
"PROX": 244, |
|
"ate": 245, |
|
"no": 246, |
|
"so": 247, |
|
"Ġno": 248, |
|
"rive": 249, |
|
"Ġnot": 250, |
|
"Ġlong": 251, |
|
"Ġlack": 252, |
|
"PN": 253, |
|
"AX": 254, |
|
"EL": 255, |
|
"EV": 256, |
|
"IRR": 257, |
|
"MAN": 258, |
|
"ad": 259, |
|
"ag": 260, |
|
"at": 261, |
|
"con": 262, |
|
"de": 263, |
|
"ike": 264, |
|
"ian": 265, |
|
"ide": 266, |
|
"long": 267, |
|
"like": 268, |
|
"mall": 269, |
|
"os": 270, |
|
"ole": 271, |
|
"pre": 272, |
|
"pouse": 273, |
|
"ros": 274, |
|
"side": 275, |
|
"tin": 276, |
|
"uall": 277, |
|
"year": 278, |
|
"ĠCN": 279, |
|
"Ġho": 280, |
|
"Ġac": 281, |
|
"ĠAX": 282, |
|
"Ġcon": 283, |
|
"Ġyear": 284, |
|
"ĠCan": 285, |
|
"ĠPCNJ": 286, |
|
"Ġgir": 287, |
|
"Ġsmall": 288, |
|
"Ġspouse": 289, |
|
"ĠDM": 290, |
|
"ĠDIS": 291, |
|
"Ġwee": 292, |
|
"Ġpee": 293, |
|
"Ġpole": 294, |
|
"ree": 295, |
|
"rest": 296, |
|
"Ġinside": 297, |
|
"any": 298, |
|
"Ġpick": 299, |
|
"ĠSEL": 300, |
|
"Ġman": 301, |
|
"company": 302, |
|
"acros": 303, |
|
"Ġtrain": 304, |
|
"pare": 305, |
|
"Ġthree": 306, |
|
"heart": 307, |
|
"Ġlie": 308, |
|
"case": 309, |
|
"Ġreturn": 310, |
|
"Ġwhat": 311, |
|
"EVID": 312, |
|
"MANR": 313, |
|
"adian": 314, |
|
"again": 315, |
|
"prepare": 316, |
|
"tinuall": 317, |
|
"ĠCNTR": 318, |
|
"Ġhole": 319, |
|
"Ġaccompany": 320, |
|
"Ġcontinuall": 321, |
|
"ĠCanadian": 322, |
|
"Ġgirl": 323, |
|
"ĠDISTR": 324, |
|
"Ġweek": 325, |
|
"ĠSELF": 326, |
|
"across": 327, |
|
"Ġcontinually": 328, |
|
"ES": 329, |
|
"ak": 330, |
|
"eri": 331, |
|
"epar": 332, |
|
"gh": 333, |
|
"ig": 334, |
|
"ind": 335, |
|
"Ġun": 336, |
|
"ough": 337, |
|
"Ġsepar": 338, |
|
"ĠDES": 339, |
|
"Ġperi": 340, |
|
"Ġhear": 341, |
|
"reak": 342, |
|
"Ġarrive": 343, |
|
"ter": 344, |
|
"Ġfind": 345, |
|
"Ġone": 346, |
|
"meter": 347, |
|
"Ġback": 348, |
|
"Ġbig": 349, |
|
"Ġbreak": 350, |
|
"Ġoh": 351, |
|
"Ġthough": 352, |
|
"Ġunable": 353, |
|
"Ġseparate": 354, |
|
"Ġperimeter": 355, |
|
"Ġthought": 356, |
|
"fir": 357, |
|
"ime": 358, |
|
"lo": 359, |
|
"pain": 360, |
|
"run": 361, |
|
"the": 362, |
|
"time": 363, |
|
"Ġco": 364, |
|
"Ġall": 365, |
|
"ĠSPT": 366, |
|
"Ġrun": 367, |
|
"ĠPREP": 368, |
|
"EPIS": 369, |
|
"Ġfear": 370, |
|
"pen": 371, |
|
"Ġblo": 372, |
|
"ĠSpain": 373, |
|
"Ġopen": 374, |
|
"Ġlive": 375, |
|
"first": 376, |
|
"Ġcome": 377, |
|
"Ġblock": 378, |
|
"eca": 379, |
|
"use": 380, |
|
"Ġbeca": 381, |
|
"Ġbecause": 382, |
|
"AL": 383, |
|
"BM": 384, |
|
"VAL": 385, |
|
"ai": 386, |
|
"as": 387, |
|
"ame": 388, |
|
"ce": 389, |
|
"en": 390, |
|
"ep": 391, |
|
"ff": 392, |
|
"ite": 393, |
|
"ice": 394, |
|
"lac": 395, |
|
"mar": 396, |
|
"name": 397, |
|
"prive": 398, |
|
"rs": 399, |
|
"sit": 400, |
|
"uprive": 401, |
|
"was": 402, |
|
"ĠCAUS": 403, |
|
"ĠIBM": 404, |
|
"Ġsle": 405, |
|
"ĠDen": 406, |
|
"Ġwate": 407, |
|
"Ġplac": 408, |
|
"pers": 409, |
|
"Ġmai": 410, |
|
"ach": 411, |
|
"Ġoff": 412, |
|
"Ġreach": 413, |
|
"Ġwhite": 414, |
|
"mark": 415, |
|
"upriver": 416, |
|
"wash": 417, |
|
"Ġsleep": 418, |
|
"ĠDenmark": 419, |
|
"Ġwater": 420, |
|
"Ġplace": 421, |
|
"person": 422, |
|
"Ġmail": 423, |
|
"Ġoffice": 424 |
|
}, |
|
"merges": [ |
|
"a r", |
|
"n d", |
|
"s t", |
|
"ar t", |
|
"e nd", |
|
"Ġ <", |
|
"st art", |
|
"C N", |
|
"I I", |
|
"Ġ C", |
|
"CN J", |
|
"ĠC CNJ", |
|
"S G", |
|
"Ġ L", |
|
"O C", |
|
"o u", |
|
"Ġ I", |
|
"o n", |
|
"P L", |
|
"Ġ P", |
|
"ĠL OC", |
|
"h o", |
|
"Ġ g", |
|
"Ġ s", |
|
"i n", |
|
"A S", |
|
"Ġg o", |
|
"Ġ N", |
|
"Ġ D", |
|
"Ġ w", |
|
"ĠP R", |
|
"Ġ t", |
|
"ĠI N", |
|
"Ġ 1", |
|
"Ġ p", |
|
"S P", |
|
"M P", |
|
"O MP", |
|
"ĠC OMP", |
|
"Ġ h", |
|
"T R", |
|
"E P", |
|
"l e", |
|
"c k", |
|
"Ġ l", |
|
"O SP", |
|
"ĠPR OSP", |
|
"k e", |
|
"l l", |
|
"s e", |
|
"I D", |
|
"r e", |
|
"Ġ ar", |
|
"V B", |
|
"ĠL VB", |
|
"a y", |
|
"Ġ in", |
|
"ou t", |
|
"C CNJ", |
|
"o r", |
|
"C EP", |
|
"ĠIN CEP", |
|
"E G", |
|
"F OC", |
|
"t e", |
|
"ĠN EG", |
|
"a ke", |
|
"ou nd", |
|
"Ġar ound", |
|
"W ID", |
|
"Ġ f", |
|
"ĠD WID", |
|
"Ġ on", |
|
"P AS", |
|
"m e", |
|
"PAS S", |
|
"a n", |
|
"e e", |
|
"p e", |
|
"E AS", |
|
"R EAS", |
|
"Ġ b", |
|
"Ġ REAS", |
|
"ou se", |
|
"Ġp i", |
|
"Ġ S", |
|
"c o", |
|
"L Z", |
|
"M LZ", |
|
"ĠN MLZ", |
|
"or k", |
|
"i t", |
|
"Ġ m", |
|
"B L", |
|
"F V", |
|
"O BL", |
|
"P FV", |
|
"ĠI PFV", |
|
"A N", |
|
"m p", |
|
"t o", |
|
"Ġw ho", |
|
"Ġh ouse", |
|
"Ġpi pe", |
|
"co mp", |
|
"Ġh it", |
|
"a c", |
|
"a in", |
|
"a ll", |
|
"Ġt r", |
|
"ĠIN S", |
|
"e ar", |
|
"o w", |
|
"o ll", |
|
"Ġt ake", |
|
"Ġf oll", |
|
"Ġfoll ow", |
|
"R R", |
|
"Ġ out", |
|
"A U", |
|
"Ġ OBL", |
|
"AU S", |
|
"r i", |
|
"on g", |
|
"S X", |
|
"a ck", |
|
"l m", |
|
"v e", |
|
"Ġ o", |
|
"ho lm", |
|
"Ġs ee", |
|
"ck holm", |
|
"ĠS to", |
|
"ĠSto ckholm", |
|
"C AUS", |
|
"I P", |
|
"T IP", |
|
"l y", |
|
"o d", |
|
"p ar", |
|
"Ġ comp", |
|
"Ġgo od", |
|
"Ġw ork", |
|
"Ġt h", |
|
"le te", |
|
"AN TIP", |
|
"Ġcomp lete", |
|
"Ġcomplete ly", |
|
"E R", |
|
"V ER", |
|
"Ġ VER", |
|
"Ġs ay", |
|
"D M", |
|
"I S", |
|
"h e", |
|
"i r", |
|
"r n", |
|
"r t", |
|
"t u", |
|
"Ġ 3", |
|
"II I", |
|
"ho rt", |
|
"Ġs hort", |
|
"Ġl i", |
|
"Ġm ake", |
|
"Ġtr y", |
|
"tu rn", |
|
"a b", |
|
"u n", |
|
"Ġl ay", |
|
"ab le", |
|
"A R", |
|
"c a", |
|
"d o", |
|
"w ay", |
|
"Ġ do", |
|
"ĠI RR", |
|
"ĠP AR", |
|
"SP T", |
|
"Ġdo or", |
|
"ĠPAR T", |
|
"D E", |
|
"O X", |
|
"P R", |
|
"w ork", |
|
"Ġ re", |
|
"ho me", |
|
"Ġw h", |
|
"DE M", |
|
"PR OX", |
|
"a te", |
|
"n o", |
|
"s o", |
|
"Ġ no", |
|
"ri ve", |
|
"Ġno t", |
|
"Ġl ong", |
|
"Ġl ack", |
|
"P N", |
|
"A X", |
|
"E L", |
|
"E V", |
|
"I RR", |
|
"M AN", |
|
"a d", |
|
"a g", |
|
"a t", |
|
"c on", |
|
"d e", |
|
"i ke", |
|
"i an", |
|
"i de", |
|
"l ong", |
|
"l ike", |
|
"m all", |
|
"o s", |
|
"o le", |
|
"p re", |
|
"p ouse", |
|
"r os", |
|
"s ide", |
|
"t in", |
|
"u all", |
|
"y ear", |
|
"Ġ CN", |
|
"Ġ ho", |
|
"Ġ ac", |
|
"Ġ AX", |
|
"Ġ con", |
|
"Ġ year", |
|
"ĠC an", |
|
"ĠP CNJ", |
|
"Ġg ir", |
|
"Ġs mall", |
|
"Ġs pouse", |
|
"ĠD M", |
|
"ĠD IS", |
|
"Ġw ee", |
|
"Ġp ee", |
|
"Ġp ole", |
|
"re e", |
|
"re st", |
|
"Ġin side", |
|
"an y", |
|
"Ġpi ck", |
|
"ĠS EL", |
|
"Ġm an", |
|
"comp any", |
|
"ac ros", |
|
"Ġtr ain", |
|
"par e", |
|
"Ġth ree", |
|
"he art", |
|
"Ġli e", |
|
"ca se", |
|
"Ġre turn", |
|
"Ġwh at", |
|
"EV ID", |
|
"MAN R", |
|
"ad ian", |
|
"ag ain", |
|
"pre pare", |
|
"tin uall", |
|
"ĠCN TR", |
|
"Ġho le", |
|
"Ġac company", |
|
"Ġcon tinuall", |
|
"ĠCan adian", |
|
"Ġgir l", |
|
"ĠDIS TR", |
|
"Ġwee k", |
|
"ĠSEL F", |
|
"acros s", |
|
"Ġcontinuall y", |
|
"E S", |
|
"a k", |
|
"e ri", |
|
"e par", |
|
"g h", |
|
"i g", |
|
"i nd", |
|
"Ġ un", |
|
"ou gh", |
|
"Ġs epar", |
|
"ĠD ES", |
|
"Ġp eri", |
|
"Ġh ear", |
|
"re ak", |
|
"Ġar rive", |
|
"te r", |
|
"Ġf ind", |
|
"Ġon e", |
|
"me ter", |
|
"Ġb ack", |
|
"Ġb ig", |
|
"Ġb reak", |
|
"Ġo h", |
|
"Ġth ough", |
|
"Ġun able", |
|
"Ġsepar ate", |
|
"Ġperi meter", |
|
"Ġthough t", |
|
"f ir", |
|
"i me", |
|
"l o", |
|
"p ain", |
|
"r un", |
|
"t he", |
|
"t ime", |
|
"Ġ co", |
|
"Ġ all", |
|
"Ġ SPT", |
|
"Ġ run", |
|
"ĠPR EP", |
|
"EP IS", |
|
"Ġf ear", |
|
"pe n", |
|
"Ġb lo", |
|
"ĠS pain", |
|
"Ġo pen", |
|
"Ġli ve", |
|
"fir st", |
|
"Ġco me", |
|
"Ġblo ck", |
|
"e ca", |
|
"u se", |
|
"Ġb eca", |
|
"Ġbeca use", |
|
"A L", |
|
"B M", |
|
"V AL", |
|
"a i", |
|
"a s", |
|
"a me", |
|
"c e", |
|
"e n", |
|
"e p", |
|
"f f", |
|
"i te", |
|
"i ce", |
|
"l ac", |
|
"m ar", |
|
"n ame", |
|
"p rive", |
|
"r s", |
|
"s it", |
|
"u prive", |
|
"w as", |
|
"ĠC AUS", |
|
"ĠI BM", |
|
"Ġs le", |
|
"ĠD en", |
|
"Ġw ate", |
|
"Ġp lac", |
|
"pe rs", |
|
"Ġm ai", |
|
"ac h", |
|
"Ġo ff", |
|
"Ġre ach", |
|
"Ġwh ite", |
|
"mar k", |
|
"uprive r", |
|
"was h", |
|
"Ġsle ep", |
|
"ĠDen mark", |
|
"Ġwate r", |
|
"Ġplac e", |
|
"pers on", |
|
"Ġmai l", |
|
"Ġoff ice" |
|
] |
|
} |
|
} |