Xiangtai commited on
Commit
1059a41
·
verified ·
1 Parent(s): ec26f7a

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. added_tokens.json +33 -12
  2. config.json +4 -4
  3. special_tokens_map.json +11 -0
  4. tokenizer.json +296 -11
  5. tokenizer_config.json +193 -13
added_tokens.json CHANGED
@@ -1,17 +1,38 @@
1
  {
2
- "</box>": 151654,
3
- "</img>": 151647,
4
- "</p>": 151657,
5
- "</quad>": 151650,
6
- "</ref>": 151652,
7
- "<IMG_CONTEXT>": 151648,
8
- "<box>": 151653,
9
- "<img>": 151646,
10
- "<p>": 151656,
11
- "<quad>": 151649,
12
- "<ref>": 151651,
 
 
 
 
 
 
13
  "<|endoftext|>": 151643,
 
 
 
 
 
14
  "<|im_end|>": 151645,
15
  "<|im_start|>": 151644,
16
- "[SEG]": 151655
 
 
 
 
 
 
 
 
 
 
17
  }
 
1
  {
2
+ "</box>": 151673,
3
+ "</img>": 151666,
4
+ "</p>": 151676,
5
+ "</quad>": 151669,
6
+ "</ref>": 151671,
7
+ "</tool_call>": 151658,
8
+ "</vp>": 151678,
9
+ "<IMG_CONTEXT>": 151667,
10
+ "<box>": 151672,
11
+ "<img>": 151665,
12
+ "<p>": 151675,
13
+ "<quad>": 151668,
14
+ "<ref>": 151670,
15
+ "<tool_call>": 151657,
16
+ "<vp>": 151677,
17
+ "<|box_end|>": 151649,
18
+ "<|box_start|>": 151648,
19
  "<|endoftext|>": 151643,
20
+ "<|file_sep|>": 151664,
21
+ "<|fim_middle|>": 151660,
22
+ "<|fim_pad|>": 151662,
23
+ "<|fim_prefix|>": 151659,
24
+ "<|fim_suffix|>": 151661,
25
  "<|im_end|>": 151645,
26
  "<|im_start|>": 151644,
27
+ "<|image_pad|>": 151655,
28
+ "<|object_ref_end|>": 151647,
29
+ "<|object_ref_start|>": 151646,
30
+ "<|quad_end|>": 151651,
31
+ "<|quad_start|>": 151650,
32
+ "<|repo_name|>": 151663,
33
+ "<|video_pad|>": 151656,
34
+ "<|vision_end|>": 151653,
35
+ "<|vision_pad|>": 151654,
36
+ "<|vision_start|>": 151652,
37
+ "[SEG]": 151674
38
  }
config.json CHANGED
@@ -13,7 +13,7 @@
13
  "force_image_size": 448,
14
  "hidden_size": 896,
15
  "llm_config": {
16
- "_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
17
  "add_cross_attention": false,
18
  "architectures": [
19
  "Qwen2ForCausalLM"
@@ -51,7 +51,7 @@
51
  "length_penalty": 1.0,
52
  "max_length": 20,
53
  "max_position_embeddings": 32768,
54
- "max_window_layers": 24,
55
  "min_length": 0,
56
  "model_type": "qwen2",
57
  "no_repeat_ngram_size": 0,
@@ -81,7 +81,7 @@
81
  "temperature": 1.0,
82
  "tf_legacy_loss": false,
83
  "tie_encoder_decoder": false,
84
- "tie_word_embeddings": true,
85
  "tokenizer_class": null,
86
  "top_k": 50,
87
  "top_p": 1.0,
@@ -92,7 +92,7 @@
92
  "use_bfloat16": true,
93
  "use_cache": true,
94
  "use_sliding_window": false,
95
- "vocab_size": 151658
96
  },
97
  "max_dynamic_patch": 12,
98
  "min_dynamic_patch": 1,
 
13
  "force_image_size": 448,
14
  "hidden_size": 896,
15
  "llm_config": {
16
+ "_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
17
  "add_cross_attention": false,
18
  "architectures": [
19
  "Qwen2ForCausalLM"
 
51
  "length_penalty": 1.0,
52
  "max_length": 20,
53
  "max_position_embeddings": 32768,
54
+ "max_window_layers": 21,
55
  "min_length": 0,
56
  "model_type": "qwen2",
57
  "no_repeat_ngram_size": 0,
 
81
  "temperature": 1.0,
82
  "tf_legacy_loss": false,
83
  "tie_encoder_decoder": false,
84
+ "tie_word_embeddings": false,
85
  "tokenizer_class": null,
86
  "top_k": 50,
87
  "top_p": 1.0,
 
92
  "use_bfloat16": true,
93
  "use_cache": true,
94
  "use_sliding_window": false,
95
+ "vocab_size": 151679
96
  },
97
  "max_dynamic_patch": 12,
98
  "min_dynamic_patch": 1,
special_tokens_map.json CHANGED
@@ -2,6 +2,17 @@
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
5
  "<img>",
6
  "</img>",
7
  "<IMG_CONTEXT>",
 
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>",
16
  "<img>",
17
  "</img>",
18
  "<IMG_CONTEXT>",
tokenizer.json CHANGED
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 151646,
35
- "content": "<img>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 151647,
44
- "content": "</img>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
@@ -50,7 +50,7 @@
50
  },
51
  {
52
  "id": 151648,
53
- "content": "<IMG_CONTEXT>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 151649,
62
- "content": "<quad>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 151650,
71
- "content": "</quad>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 151651,
80
- "content": "<ref>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 151652,
89
- "content": "</ref>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 151653,
98
- "content": "<box>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 151654,
107
- "content": "</box>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 151655,
116
- "content": "[SEG]",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 151656,
125
- "content": "<p>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,12 +131,201 @@
131
  },
132
  {
133
  "id": 151657,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "content": "</p>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
  "normalized": false,
139
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  }
141
  ],
142
  "normalizer": {
@@ -152137,6 +152326,7 @@
152137
  "o k",
152138
  "_ _",
152139
  "c l",
 
152140
  "Ġh e",
152141
  "ar d",
152142
  ") .",
@@ -152391,6 +152581,7 @@
152391
  "Ġp er",
152392
  "C ont",
152393
  "ing s",
 
152394
  "Ġd ata",
152395
  "Ġs a",
152396
  "e f",
@@ -152637,6 +152828,7 @@
152637
  "is e",
152638
  "m in",
152639
  "ĠĠĠĠ Ċ",
 
152640
  "eth od",
152641
  ". P",
152642
  "ut e",
@@ -152926,6 +153118,7 @@
152926
  "Ex ception",
152927
  "in put",
152928
  "Ġtr ans",
 
152929
  "ord er",
152930
  "B y",
152931
  "Ġa w",
@@ -153009,6 +153202,7 @@
153009
  "Ġb reak",
153010
  "Ġ );Ċ",
153011
  "re n",
 
153012
  "it t",
153013
  "Ġa p",
153014
  "ĉ c",
@@ -153998,6 +154192,7 @@
153998
  "http s",
153999
  "Ġd em",
154000
  "Ġe ar",
 
154001
  "Ġm atch",
154002
  "or ies",
154003
  "ac es",
@@ -154219,6 +154414,7 @@
154219
  "ER R",
154220
  "Ġ< =",
154221
  "at ely",
 
154222
  "u ction",
154223
  "ĠT e",
154224
  "Ġl ink",
@@ -154228,6 +154424,7 @@
154228
  "m e",
154229
  "Ġg iven",
154230
  "O ut",
 
154231
  "Ġbet ter",
154232
  "P age",
154233
  "Ġfe el",
@@ -156520,6 +156717,7 @@
156520
  "o ch",
156521
  "_ image",
156522
  "\\ t",
 
156523
  "( L",
156524
  "Ġindu stry",
156525
  "com ing",
@@ -156589,6 +156787,7 @@
156589
  "Ġcurrent ly",
156590
  "se c",
156591
  "Ġrelations hip",
 
156592
  "ĠM ap",
156593
  "as et",
156594
  "Ġparam eters",
@@ -157090,6 +157289,7 @@
157090
  "ast e",
157091
  "Ġpro file",
157092
  "Ġread y",
 
157093
  "ro te",
157094
  "Ġs ense",
157095
  "G ener",
@@ -157314,6 +157514,7 @@
157314
  "Ġp en",
157315
  ".b tn",
157316
  "ĠA S",
 
157317
  "Ġcho ice",
157318
  "ĠP age",
157319
  "_P RO",
@@ -157473,6 +157674,7 @@
157473
  "S cript",
157474
  "d at",
157475
  "Ġr ule",
 
157476
  "=\" /",
157477
  "S erial",
157478
  "Ġstart ing",
@@ -158049,6 +158251,7 @@
158049
  "ib ly",
158050
  ".e quals",
158051
  "Ġintern ational",
 
158052
  "oo th",
158053
  "W riter",
158054
  "i ate",
@@ -159568,6 +159771,7 @@
159568
  "ĠH ot",
159569
  "ĠInd ex",
159570
  "; &",
 
159571
  "ĠN or",
159572
  "ĠC ap",
159573
  "- (",
@@ -159722,6 +159926,7 @@
159722
  "ĠApp le",
159723
  "g in",
159724
  "Re quired",
 
159725
  "land s",
159726
  "Ġs qu",
159727
  "Ġfact or",
@@ -160257,6 +160462,7 @@
160257
  "i ro",
160258
  "Ġrem ote",
160259
  "ĠI T",
 
160260
  "Ġred istrib",
160261
  "ra z",
160262
  "ĠS ince",
@@ -161494,6 +161700,7 @@
161494
  "ĠJ ournal",
161495
  "r outer",
161496
  "Ġmys qli",
 
161497
  ") \"",
161498
  "-x s",
161499
  "let s",
@@ -163582,6 +163789,7 @@
163582
  "-d anger",
163583
  "Ġroom s",
163584
  "con v",
 
163585
  ". op",
163586
  "ĠA rea",
163587
  "_S C",
@@ -164620,6 +164828,7 @@
164620
  "Ġfact s",
164621
  "Ġun t",
164622
  ".in stance",
 
164623
  "- end",
164624
  "ĠJO IN",
164625
  "ĠH en",
@@ -164905,6 +165114,7 @@
164905
  "ĠJew ish",
164906
  "Ġre covery",
164907
  "Ġstand s",
 
164908
  "Ġafter noon",
164909
  "ĠArt icle",
164910
  "_ att",
@@ -165440,6 +165650,7 @@
165440
  "err a",
165441
  "Ċ ĠĠĊ",
165442
  "utor ial",
 
165443
  "p ay",
165444
  "char ge",
165445
  "ĠI re",
@@ -165924,6 +166135,7 @@
165924
  "Ġstre ets",
165925
  "_M SG",
165926
  "New s",
 
165927
  ": /",
165928
  "Ġcut ting",
165929
  "x B",
@@ -167739,6 +167951,7 @@
167739
  "gener ated",
167740
  "Ġad mitted",
167741
  "Ġp ussy",
 
167742
  "Ġsw im",
167743
  "un ion",
167744
  "N a",
@@ -167790,6 +168003,7 @@
167790
  "vel y",
167791
  "} );čĊ",
167792
  "_ ENT",
 
167793
  "art icles",
167794
  "��Sou thern",
167795
  "Ġstrong er",
@@ -169002,6 +169216,7 @@
169002
  "Ġcharacter istics",
169003
  "D one",
169004
  "el n",
 
169005
  "PO S",
169006
  "Ġd ensity",
169007
  "ĠPl atform",
@@ -170006,6 +170221,7 @@
170006
  "d og",
170007
  "Ġclick ing",
170008
  "), ĊĊ",
 
170009
  "Oper ator",
170010
  "Ġc iv",
170011
  "Ġm erg",
@@ -171417,6 +171633,7 @@
171417
  "Ġdemonstr ate",
171418
  "ĠHow ard",
171419
  "D rop",
 
171420
  "Ġinv oke",
171421
  "ĠB ridge",
171422
  "end en",
@@ -174158,6 +174375,7 @@
174158
  "ĠP ub",
174159
  "Ġâ Ķ",
174160
  "c ion",
 
174161
  "II I",
174162
  "Tag Name",
174163
  "Ġam id",
@@ -175207,6 +175425,7 @@
175207
  ".m aterial",
175208
  "ĠD ue",
175209
  "ĠP el",
 
175210
  "Ġindepend ence",
175211
  "ex us",
175212
  "Sh adow",
@@ -178029,6 +178248,7 @@
178029
  "ĠFant asy",
178030
  "st ory",
178031
  "Ġm ême",
 
178032
  "_s lice",
178033
  "olt age",
178034
  "H ar",
@@ -178381,6 +178601,7 @@
178381
  "am ination",
178382
  "[ @\"",
178383
  "Ġm uj",
 
178384
  "First OrDefault",
178385
  "then Return",
178386
  "C he",
@@ -178552,6 +178773,7 @@
178552
  "ather ine",
178553
  "( dest",
178554
  "az ed",
 
178555
  "sem bl",
178556
  ", M",
178557
  "ob y",
@@ -180388,6 +180610,7 @@
180388
  "ib o",
180389
  "Ġlo yal",
180390
  "Ġuse less",
 
180391
  "ĠUlt imate",
180392
  "C ome",
180393
  "g el",
@@ -180595,6 +180818,7 @@
180595
  "ation ally",
180596
  "ĠMe eting",
180597
  "è¯ ¯",
 
180598
  "Ġrout ing",
180599
  ".f ocus",
180600
  "ĠYou th",
@@ -181391,6 +181615,7 @@
181391
  ".prot ocol",
181392
  "AF E",
181393
  "Ġtext ures",
 
181394
  "umb ai",
181395
  ".st ats",
181396
  "ĠG E",
@@ -183577,6 +183802,7 @@
183577
  ".F ull",
183578
  ". undefined",
183579
  "ĠSequ elize",
 
183580
  "Ġeduc ated",
183581
  "_O VER",
183582
  "åº ı",
@@ -185188,6 +185414,7 @@
185188
  "dd s",
185189
  "Ġdepos its",
185190
  "ĉd river",
 
185191
  "pr ising",
185192
  "print ln",
185193
  "Ġpres enter",
@@ -192020,6 +192247,7 @@
192020
  "ac ific",
192021
  ".v olume",
192022
  "Ġmir rors",
 
192023
  "Ġviol ate",
192024
  "ars ers",
192025
  "Ġsoc io",
@@ -192877,6 +193105,7 @@
192877
  "ar de",
192878
  "Ġfier ce",
192879
  "lic ted",
 
192880
  "Ġbreak through",
192881
  "ĠE rik",
192882
  "Ġb ride",
@@ -195619,6 +195848,7 @@
195619
  "rown ed",
195620
  "_m ed",
195621
  "ĉ date",
 
195622
  "Ġcost umes",
195623
  "ĠRe quires",
195624
  "aff le",
@@ -197683,6 +197913,7 @@
197683
  "Ġmock ed",
197684
  "ĠT ory",
197685
  "Ġ\") \";Ċ",
 
197686
  "Ġli ed",
197687
  "Ġs vc",
197688
  "_g ui",
@@ -199607,10 +199838,12 @@
199607
  "d aily",
199608
  "ĠC oding",
199609
  "( destination",
 
199610
  "uj Äħ",
199611
  "Ġemerg ence",
199612
  "_p ara",
199613
  "_IN CLUDE",
 
199614
  "Ġrecogn izing",
199615
  "Ġf ug",
199616
  "\"} },Ċ",
@@ -200026,6 +200259,7 @@
200026
  "k p",
200027
  "IT ES",
200028
  "Ġ ################################################################",
 
200029
  "/ Desktop",
200030
  "ĉgl m",
200031
  "Ġz inc",
@@ -202051,6 +202285,7 @@
202051
  "Ġdee pest",
202052
  "w k",
202053
  "ĠNo ise",
 
202054
  "Ġpr éc",
202055
  "ot le",
202056
  "ÑĤ е",
@@ -203850,6 +204085,7 @@
203850
  "-effect ive",
203851
  "Ġsk u",
203852
  "ed ly",
 
203853
  "ĠH olly",
203854
  "ĠFL ASH",
203855
  "/ TR",
@@ -207068,6 +207304,7 @@
207068
  "_s b",
207069
  "om ens",
207070
  "ĠExec utes",
 
207071
  "TT Y",
207072
  "ĠValue Type",
207073
  "); */Ċ",
@@ -207467,6 +207704,7 @@
207467
  "E OS",
207468
  "H al",
207469
  "Ġtrust worthy",
 
207470
  ".EX TRA",
207471
  "Ġman o",
207472
  "is icing",
@@ -212301,6 +212539,7 @@
212301
  "/ place",
212302
  "Ġhol istic",
212303
  ": t",
 
212304
  "Ġb oto",
212305
  "Ġnause a",
212306
  "ĠSh ooting",
@@ -212437,6 +212676,7 @@
212437
  "C ause",
212438
  "at ypes",
212439
  "man ufacturer",
 
212440
  "Ġsp or",
212441
  "es on",
212442
  "Ġpun ched",
@@ -214046,6 +214286,7 @@
214046
  "S creenshot",
214047
  "esthes ia",
214048
  "Ġwalk er",
 
214049
  "co ordinate",
214050
  "_ interest",
214051
  "Ġhelp less",
@@ -215256,6 +215497,7 @@
215256
  "Field Type",
215257
  "ok able",
215258
  "ĠRT L",
 
215259
  "Ġ% {",
215260
  "Ġar ist",
215261
  ".Get Mapping",
@@ -217578,6 +217820,7 @@
217578
  "so on",
217579
  "Ġdis fr",
217580
  "ĉ Vec",
 
217581
  ".s chool",
217582
  "Ġbl inds",
217583
  "Ġac ab",
@@ -218849,6 +219092,7 @@
218849
  "ĉex ec",
218850
  "EN AME",
218851
  "_ letters",
 
218852
  "ĠC s",
218853
  "'] ==\"",
218854
  "Ġ\" ')",
@@ -219050,6 +219294,7 @@
219050
  "Ġtem pl",
219051
  "ĠExp ense",
219052
  "e ight",
 
219053
  "z ones",
219054
  ".p arts",
219055
  "at rice",
@@ -219384,6 +219629,7 @@
219384
  "i ard",
219385
  "Theme Provider",
219386
  "Ġevent Data",
 
219387
  ".get Url",
219388
  "Ġtool box",
219389
  "Ġover riding",
@@ -220228,6 +220474,7 @@
220228
  "ĠTh eta",
220229
  "_inter p",
220230
  "R aster",
 
220231
  ", obj",
220232
  "Ġtweet ing",
220233
  "_G PU",
@@ -220575,6 +220822,7 @@
220575
  "Ġè¾ĵ åħ¥",
220576
  "Ġintimid ation",
220577
  "end ale",
 
220578
  "Ġinsight ful",
220579
  "Ġs ands",
220580
  "Ġphotograph ic",
@@ -220980,6 +221228,7 @@
220980
  "Dep loy",
220981
  ".Con tract",
220982
  "- bo",
 
220983
  "Ġinter ception",
220984
  "Ġis bn",
220985
  "Ġman ners",
@@ -221462,6 +221711,7 @@
221462
  "\\ \\\"",
221463
  "- job",
221464
  "Ġsevent y",
 
221465
  "ĠMan or",
221466
  "Ġdown right",
221467
  "Ġtime frame",
@@ -223267,6 +223517,7 @@
223267
  "/m essages",
223268
  "Ġof Type",
223269
  "ĉs wap",
 
223270
  "ĠTur ks",
223271
  "N ES",
223272
  "Ġprogress ively",
@@ -223514,6 +223765,7 @@
223514
  "_T itle",
223515
  "(G tk",
223516
  "Ġc elle",
 
223517
  "ĠJ oomla",
223518
  "\"> //",
223519
  "Month ly",
@@ -227019,6 +227271,7 @@
227019
  "ĠRevel ation",
227020
  "Ġrap ide",
227021
  "p unk",
 
227022
  "Object Id",
227023
  "abin et",
227024
  "extr acomment",
@@ -227423,6 +227676,7 @@
227423
  "(c d",
227424
  "Ġun zip",
227425
  "Ġglam orous",
 
227426
  "Ġn aw",
227427
  "Ġmin ib",
227428
  "ĠBr an",
@@ -227705,6 +227959,7 @@
227705
  "Ġragaz za",
227706
  "/ tag",
227707
  "Ġirres ist",
 
227708
  "****** *čĊ",
227709
  "Ġrestr ained",
227710
  "Ġch iropr",
@@ -227842,6 +228097,7 @@
227842
  "Ġgr âce",
227843
  "ĠH elsinki",
227844
  "G ro",
 
227845
  "ìĭ Ŀ",
227846
  "Ġsou ha",
227847
  "ĠInd ie",
@@ -228634,6 +228890,7 @@
228634
  ".M ESSAGE",
228635
  "(de g",
228636
  "å¿ Ĺ",
 
228637
  "Ġ\"\" ),",
228638
  "kl är",
228639
  "\\M ail",
@@ -228848,6 +229105,7 @@
228848
  "/ grid",
228849
  "Ġfil thy",
228850
  ".e v",
 
228851
  "Ġs are",
228852
  "Ġso aking",
228853
  "ĠReg ions",
@@ -229088,6 +229346,7 @@
229088
  "` ;",
229089
  "ãĥ ĭ",
229090
  "cent e",
 
229091
  "Ġlex ical",
229092
  "ĠB RO",
229093
  "Ġr ulings",
@@ -229196,6 +229455,7 @@
229196
  "ĉ props",
229197
  "Ġrot ten",
229198
  "Re jected",
 
229199
  ". ua",
229200
  "ĠAm nesty",
229201
  "Ġpenn ed",
@@ -229538,6 +229798,7 @@
229538
  "cul os",
229539
  "_person al",
229540
  "Ġanaly tic",
 
229541
  "_mem cpy",
229542
  "(List Node",
229543
  "_T ag",
@@ -230455,6 +230716,7 @@
230455
  "ìŀħ ëĭĪëĭ¤",
230456
  "Ġsid eline",
230457
  ".Array Adapter",
 
230458
  "ĠSy rians",
230459
  "ĠAtt endance",
230460
  "-es que",
@@ -230947,6 +231209,7 @@
230947
  "ALL ERY",
230948
  "c j",
230949
  "x AD",
 
230950
  "Ġitalian i",
230951
  "| #",
230952
  "Ġreg enerate",
@@ -231915,6 +232178,7 @@
231915
  "_C ategory",
231916
  "ĠBT N",
231917
  "ĠDar th",
 
231918
  "eth nic",
231919
  "arch itecture",
231920
  "ĠCou pe",
@@ -232989,6 +233253,7 @@
232989
  "Ġcooper ating",
232990
  "ung i",
232991
  "Ar izona",
 
232992
  "-ex pression",
232993
  ".min utes",
232994
  "Ġpref ixed",
@@ -233520,6 +233785,7 @@
233520
  "ĠMom ents",
233521
  "enu ous",
233522
  "IFIC ATE",
 
233523
  "æĸĩ ç«ł",
233524
  "á»į c",
233525
  "orm sg",
@@ -233567,6 +233833,7 @@
233567
  "ĠIS IL",
233568
  "Ġco op",
233569
  "h ores",
 
233570
  "Ġcomp el",
233571
  "(s kip",
233572
  "éĺ ³",
@@ -233791,6 +234058,7 @@
233791
  "Ġ-*- čĊ",
233792
  "if es",
233793
  "ĠM ansion",
 
233794
  "C ancellation",
233795
  "Ġnear ing",
233796
  "Ġl angu",
@@ -234071,6 +234339,7 @@
234071
  "Ġw reak",
234072
  "Mar vel",
234073
  "/s l",
 
234074
  "Ġmov able",
234075
  "Ñĥ й",
234076
  "ĠDr inking",
@@ -235646,6 +235915,7 @@
235646
  "ĠPal o",
235647
  "ĠSuz anne",
235648
  "_m appings",
 
235649
  "ĠOccup ational",
235650
  "_b uckets",
235651
  "go als",
@@ -235790,6 +236060,7 @@
235790
  "-w atch",
235791
  "ĠHosp itals",
235792
  "} while",
 
235793
  "á» £",
235794
  "Ġakt ual",
235795
  "Ġkil ograms",
@@ -235911,6 +236182,7 @@
235911
  "Ġcurrent Position",
235912
  "ĠCaucas ian",
235913
  "$ img",
 
235914
  "Ġse an",
235915
  "M ess",
235916
  "*= *=",
@@ -236855,6 +237127,7 @@
236855
  "ĠR udd",
236856
  "ĠPres byterian",
236857
  "und ler",
 
236858
  "_l ahir",
236859
  "() ==\"",
236860
  "Access ibility",
@@ -239334,6 +239607,7 @@
239334
  "_AT OMIC",
239335
  "Sub Menu",
239336
  "_com press",
 
239337
  "Dr v",
239338
  ".push Button",
239339
  "Ġsuit case",
@@ -239392,6 +239666,7 @@
239392
  "Ġe books",
239393
  ") size",
239394
  "Ġspecial ised",
 
239395
  "Ġmich ael",
239396
  "ĠSTD OUT",
239397
  "ĠP ell",
@@ -239570,6 +239845,7 @@
239570
  "ĠEisen hower",
239571
  "File d",
239572
  "Ġhe bt",
 
239573
  "create QueryBuilder",
239574
  "æľī æķĪ",
239575
  "v anced",
@@ -239585,6 +239861,7 @@
239585
  "ol vable",
239586
  "Ġal as",
239587
  "(sp ell",
 
239588
  "Ġdef amation",
239589
  "( Arg",
239590
  "Ġuser Details",
@@ -241522,6 +241799,7 @@
241522
  "Ġsup p",
241523
  "pe on",
241524
  "v rier",
 
241525
  "Ġtrot z",
241526
  "Ġmel tdown",
241527
  "ark ers",
@@ -242213,6 +242491,7 @@
242213
  "ser ter",
242214
  "Ġstore front",
242215
  "_ ends",
 
242216
  "ĉg it",
242217
  "D SP",
242218
  "CH AIN",
@@ -243735,6 +244014,7 @@
243735
  "ĠST ILL",
243736
  "Qualified Name",
243737
  ". RES",
 
243738
  ".w riteln",
243739
  "ĠImmutable List",
243740
  "ĠTh umb",
@@ -244716,6 +244996,7 @@
244716
  "in ne",
244717
  "ĠCONST ANTS",
244718
  "_P rice",
 
244719
  "Ġar sch",
244720
  "ĠNS AttributedString",
244721
  "ĠFile Type",
@@ -245662,6 +245943,7 @@
245662
  "_P ICTURE",
245663
  "_OR IENTATION",
245664
  "ĠO PP",
 
245665
  "áf ico",
245666
  ".h istogram",
245667
  "ĠB enny",
@@ -245932,6 +246214,7 @@
245932
  "ew n",
245933
  "Ġpop ulous",
245934
  "ĠSh ed",
 
245935
  "ĠA lo",
245936
  "Device Info",
245937
  "(IN VOKE",
@@ -249322,6 +249605,7 @@
249322
  "ĠD ISTRIBUT",
249323
  "é ments",
249324
  "ĠValid ates",
 
249325
  "Ġ| /",
249326
  "Ġes l",
249327
  "Ġré seau",
@@ -249341,6 +249625,7 @@
249341
  "ime i",
249342
  ".get Bounds",
249343
  ".Mouse EventHandler",
 
249344
  "Generic Type",
249345
  "/c ms",
249346
  "Ġturn o",
 
32
  },
33
  {
34
  "id": 151646,
35
+ "content": "<|object_ref_start|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 151647,
44
+ "content": "<|object_ref_end|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
50
  },
51
  {
52
  "id": 151648,
53
+ "content": "<|box_start|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
 
59
  },
60
  {
61
  "id": 151649,
62
+ "content": "<|box_end|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 151650,
71
+ "content": "<|quad_start|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 151651,
80
+ "content": "<|quad_end|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 151652,
89
+ "content": "<|vision_start|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 151653,
98
+ "content": "<|vision_end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 151654,
107
+ "content": "<|vision_pad|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 151655,
116
+ "content": "<|image_pad|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 151656,
125
+ "content": "<|video_pad|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 151657,
134
+ "content": "<tool_call>",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": false
140
+ },
141
+ {
142
+ "id": 151658,
143
+ "content": "</tool_call>",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": false
149
+ },
150
+ {
151
+ "id": 151659,
152
+ "content": "<|fim_prefix|>",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": false
158
+ },
159
+ {
160
+ "id": 151660,
161
+ "content": "<|fim_middle|>",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": false
167
+ },
168
+ {
169
+ "id": 151661,
170
+ "content": "<|fim_suffix|>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": false
176
+ },
177
+ {
178
+ "id": 151662,
179
+ "content": "<|fim_pad|>",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": false
185
+ },
186
+ {
187
+ "id": 151663,
188
+ "content": "<|repo_name|>",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": false,
193
+ "special": false
194
+ },
195
+ {
196
+ "id": 151664,
197
+ "content": "<|file_sep|>",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": false
203
+ },
204
+ {
205
+ "id": 151665,
206
+ "content": "<img>",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 151666,
215
+ "content": "</img>",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 151667,
224
+ "content": "<IMG_CONTEXT>",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 151668,
233
+ "content": "<quad>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 151669,
242
+ "content": "</quad>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 151670,
251
+ "content": "<ref>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 151671,
260
+ "content": "</ref>",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 151672,
269
+ "content": "<box>",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 151673,
278
+ "content": "</box>",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 151674,
287
+ "content": "[SEG]",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 151675,
296
+ "content": "<p>",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 151676,
305
  "content": "</p>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
  "normalized": false,
310
  "special": true
311
+ },
312
+ {
313
+ "id": 151677,
314
+ "content": "<vp>",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
+ },
321
+ {
322
+ "id": 151678,
323
+ "content": "</vp>",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
  }
330
  ],
331
  "normalizer": {
 
152326
  "o k",
152327
  "_ _",
152328
  "c l",
152329
+ "# #",
152330
  "Ġh e",
152331
  "ar d",
152332
  ") .",
 
152581
  "Ġp er",
152582
  "C ont",
152583
  "ing s",
152584
+ "## ##",
152585
  "Ġd ata",
152586
  "Ġs a",
152587
  "e f",
 
152828
  "is e",
152829
  "m in",
152830
  "ĠĠĠĠ Ċ",
152831
+ "# include",
152832
  "eth od",
152833
  ". P",
152834
  "ut e",
 
153118
  "Ex ception",
153119
  "in put",
153120
  "Ġtr ans",
153121
+ "#### ####",
153122
  "ord er",
153123
  "B y",
153124
  "Ġa w",
 
153202
  "Ġb reak",
153203
  "Ġ );Ċ",
153204
  "re n",
153205
+ "# define",
153206
  "it t",
153207
  "Ġa p",
153208
  "ĉ c",
 
154192
  "http s",
154193
  "Ġd em",
154194
  "Ġe ar",
154195
+ "######## ########",
154196
  "Ġm atch",
154197
  "or ies",
154198
  "ac es",
 
154414
  "ER R",
154415
  "Ġ< =",
154416
  "at ely",
154417
+ "# if",
154418
  "u ction",
154419
  "ĠT e",
154420
  "Ġl ink",
 
154424
  "m e",
154425
  "Ġg iven",
154426
  "O ut",
154427
+ "# endif",
154428
  "Ġbet ter",
154429
  "P age",
154430
  "Ġfe el",
 
156717
  "o ch",
156718
  "_ image",
156719
  "\\ t",
156720
+ "# Ċ",
156721
  "( L",
156722
  "Ġindu stry",
156723
  "com ing",
 
156787
  "Ġcurrent ly",
156788
  "se c",
156789
  "Ġrelations hip",
156790
+ "################ ################",
156791
  "ĠM ap",
156792
  "as et",
156793
  "Ġparam eters",
 
157289
  "ast e",
157290
  "Ġpro file",
157291
  "Ġread y",
157292
+ "#if ndef",
157293
  "ro te",
157294
  "Ġs ense",
157295
  "G ener",
 
157514
  "Ġp en",
157515
  ".b tn",
157516
  "ĠA S",
157517
+ "#if def",
157518
  "Ġcho ice",
157519
  "ĠP age",
157520
  "_P RO",
 
157674
  "S cript",
157675
  "d at",
157676
  "Ġr ule",
157677
+ "# import",
157678
  "=\" /",
157679
  "S erial",
157680
  "Ġstart ing",
 
158251
  "ib ly",
158252
  ".e quals",
158253
  "Ġintern ational",
158254
+ "# pragma",
158255
  "oo th",
158256
  "W riter",
158257
  "i ate",
 
159771
  "ĠH ot",
159772
  "ĠInd ex",
159773
  "; &",
159774
+ "# !",
159775
  "ĠN or",
159776
  "ĠC ap",
159777
  "- (",
 
159926
  "ĠApp le",
159927
  "g in",
159928
  "Re quired",
159929
+ "# +",
159930
  "land s",
159931
  "Ġs qu",
159932
  "Ġfact or",
 
160462
  "i ro",
160463
  "Ġrem ote",
160464
  "ĠI T",
160465
+ "#! /",
160466
  "Ġred istrib",
160467
  "ra z",
160468
  "ĠS ince",
 
161700
  "ĠJ ournal",
161701
  "r outer",
161702
  "Ġmys qli",
161703
+ "# else",
161704
  ") \"",
161705
  "-x s",
161706
  "let s",
 
163789
  "-d anger",
163790
  "Ġroom s",
163791
  "con v",
163792
+ "# {",
163793
  ". op",
163794
  "ĠA rea",
163795
  "_S C",
 
164828
  "Ġfact s",
164829
  "Ġun t",
164830
  ".in stance",
164831
+ "################################ ################################",
164832
  "- end",
164833
  "ĠJO IN",
164834
  "ĠH en",
 
165114
  "ĠJew ish",
165115
  "Ġre covery",
165116
  "Ġstand s",
165117
+ "# [",
165118
  "Ġafter noon",
165119
  "ĠArt icle",
165120
  "_ att",
 
165650
  "err a",
165651
  "Ċ ĠĠĊ",
165652
  "utor ial",
165653
+ "# a",
165654
  "p ay",
165655
  "char ge",
165656
  "ĠI re",
 
166135
  "Ġstre ets",
166136
  "_M SG",
166137
  "New s",
166138
+ "## #",
166139
  ": /",
166140
  "Ġcut ting",
166141
  "x B",
 
167951
  "gener ated",
167952
  "Ġad mitted",
167953
  "Ġp ussy",
167954
+ "# w",
167955
  "Ġsw im",
167956
  "un ion",
167957
  "N a",
 
168003
  "vel y",
168004
  "} );čĊ",
168005
  "_ ENT",
168006
+ "#+ #+",
168007
  "art icles",
168008
  "��Sou thern",
168009
  "Ġstrong er",
 
169216
  "Ġcharacter istics",
169217
  "D one",
169218
  "el n",
169219
+ "######## ####",
169220
  "PO S",
169221
  "Ġd ensity",
169222
  "ĠPl atform",
 
170221
  "d og",
170222
  "Ġclick ing",
170223
  "), ĊĊ",
170224
+ "# line",
170225
  "Oper ator",
170226
  "Ġc iv",
170227
  "Ġm erg",
 
171633
  "Ġdemonstr ate",
171634
  "ĠHow ard",
171635
  "D rop",
171636
+ "# undef",
171637
  "Ġinv oke",
171638
  "ĠB ridge",
171639
  "end en",
 
174375
  "ĠP ub",
174376
  "Ġâ Ķ",
174377
  "c ion",
174378
+ "## Ċ",
174379
  "II I",
174380
  "Tag Name",
174381
  "Ġam id",
 
175425
  ".m aterial",
175426
  "ĠD ue",
175427
  "ĠP el",
175428
+ "# print",
175429
  "Ġindepend ence",
175430
  "ex us",
175431
  "Sh adow",
 
178248
  "ĠFant asy",
178249
  "st ory",
178250
  "Ġm ême",
178251
+ "# ĊĊ",
178252
  "_s lice",
178253
  "olt age",
178254
  "H ar",
 
178601
  "am ination",
178602
  "[ @\"",
178603
  "Ġm uj",
178604
+ "## #Ċ",
178605
  "First OrDefault",
178606
  "then Return",
178607
  "C he",
 
178773
  "ather ine",
178774
  "( dest",
178775
  "az ed",
178776
+ "# endregion",
178777
  "sem bl",
178778
  ", M",
178779
  "ob y",
 
180610
  "ib o",
180611
  "Ġlo yal",
180612
  "Ġuse less",
180613
+ "# elif",
180614
  "ĠUlt imate",
180615
  "C ome",
180616
  "g el",
 
180818
  "ation ally",
180819
  "ĠMe eting",
180820
  "è¯ ¯",
180821
+ "# region",
180822
  "Ġrout ing",
180823
  ".f ocus",
180824
  "ĠYou th",
 
181615
  ".prot ocol",
181616
  "AF E",
181617
  "Ġtext ures",
181618
+ "################################ ################",
181619
  "umb ai",
181620
  ".st ats",
181621
  "ĠG E",
 
183802
  ".F ull",
183803
  ". undefined",
183804
  "ĠSequ elize",
183805
+ "################################################################ ############",
183806
  "Ġeduc ated",
183807
  "_O VER",
183808
  "åº ı",
 
185414
  "dd s",
185415
  "Ġdepos its",
185416
  "ĉd river",
185417
+ "# ga",
185418
  "pr ising",
185419
  "print ln",
185420
  "Ġpres enter",
 
192247
  "ac ific",
192248
  ".v olume",
192249
  "Ġmir rors",
192250
+ "################################################################ ################",
192251
  "Ġviol ate",
192252
  "ars ers",
192253
  "Ġsoc io",
 
193105
  "ar de",
193106
  "Ġfier ce",
193107
  "lic ted",
193108
+ "# čĊ",
193109
  "Ġbreak through",
193110
  "ĠE rik",
193111
  "Ġb ride",
 
195848
  "rown ed",
195849
  "_m ed",
195850
  "ĉ date",
195851
+ "# __",
195852
  "Ġcost umes",
195853
  "ĠRe quires",
195854
  "aff le",
 
197913
  "Ġmock ed",
197914
  "ĠT ory",
197915
  "Ġ\") \";Ċ",
197916
+ "# g",
197917
  "Ġli ed",
197918
  "Ġs vc",
197919
  "_g ui",
 
199838
  "d aily",
199839
  "ĠC oding",
199840
  "( destination",
199841
+ "# $",
199842
  "uj Äħ",
199843
  "Ġemerg ence",
199844
  "_p ara",
199845
  "_IN CLUDE",
199846
+ "# :",
199847
  "Ġrecogn izing",
199848
  "Ġf ug",
199849
  "\"} },Ċ",
 
200259
  "k p",
200260
  "IT ES",
200261
  "Ġ ################################################################",
200262
+ "# get",
200263
  "/ Desktop",
200264
  "ĉgl m",
200265
  "Ġz inc",
 
202285
  "Ġdee pest",
202286
  "w k",
202287
  "ĠNo ise",
202288
+ "### ĊĊ",
202289
  "Ġpr éc",
202290
  "ot le",
202291
  "ÑĤ е",
 
204085
  "-effect ive",
204086
  "Ġsk u",
204087
  "ed ly",
204088
+ "## ĊĊ",
204089
  "ĠH olly",
204090
  "ĠFL ASH",
204091
  "/ TR",
 
207304
  "_s b",
207305
  "om ens",
207306
  "ĠExec utes",
207307
+ "# \"",
207308
  "TT Y",
207309
  "ĠValue Type",
207310
  "); */Ċ",
 
207704
  "E OS",
207705
  "H al",
207706
  "Ġtrust worthy",
207707
+ "# =",
207708
  ".EX TRA",
207709
  "Ġman o",
207710
  "is icing",
 
212539
  "/ place",
212540
  "Ġhol istic",
212541
  ": t",
212542
+ "# ,",
212543
  "Ġb oto",
212544
  "Ġnause a",
212545
  "ĠSh ooting",
 
212676
  "C ause",
212677
  "at ypes",
212678
  "man ufacturer",
212679
+ "# ----------------------------------------------------------------------------",
212680
  "Ġsp or",
212681
  "es on",
212682
  "Ġpun ched",
 
214286
  "S creenshot",
214287
  "esthes ia",
214288
  "Ġwalk er",
214289
+ "# from",
214290
  "co ordinate",
214291
  "_ interest",
214292
  "Ġhelp less",
 
215497
  "Field Type",
215498
  "ok able",
215499
  "ĠRT L",
215500
+ "# index",
215501
  "Ġ% {",
215502
  "Ġar ist",
215503
  ".Get Mapping",
 
217820
  "so on",
217821
  "Ġdis fr",
217822
  "ĉ Vec",
217823
+ "## _",
217824
  ".s chool",
217825
  "Ġbl inds",
217826
  "Ġac ab",
 
219092
  "ĉex ec",
219093
  "EN AME",
219094
  "_ letters",
219095
+ "#### #",
219096
  "ĠC s",
219097
  "'] ==\"",
219098
  "Ġ\" ')",
 
219294
  "Ġtem pl",
219295
  "ĠExp ense",
219296
  "e ight",
219297
+ "# SBATCH",
219298
  "z ones",
219299
  ".p arts",
219300
  "at rice",
 
219629
  "i ard",
219630
  "Theme Provider",
219631
  "Ġevent Data",
219632
+ "# ad",
219633
  ".get Url",
219634
  "Ġtool box",
219635
  "Ġover riding",
 
220474
  "ĠTh eta",
220475
  "_inter p",
220476
  "R aster",
220477
+ "# error",
220478
  ", obj",
220479
  "Ġtweet ing",
220480
  "_G PU",
 
220822
  "Ġè¾ĵ åħ¥",
220823
  "Ġintimid ation",
220824
  "end ale",
220825
+ "################################################################ ########",
220826
  "Ġinsight ful",
220827
  "Ġs ands",
220828
  "Ġphotograph ic",
 
221228
  "Dep loy",
221229
  ".Con tract",
221230
  "- bo",
221231
+ "# /",
221232
  "Ġinter ception",
221233
  "Ġis bn",
221234
  "Ġman ners",
 
221711
  "\\ \\\"",
221712
  "- job",
221713
  "Ġsevent y",
221714
+ "#### Ċ",
221715
  "ĠMan or",
221716
  "Ġdown right",
221717
  "Ġtime frame",
 
223517
  "/m essages",
223518
  "Ġof Type",
223519
  "ĉs wap",
223520
+ "# plt",
223521
  "ĠTur ks",
223522
  "N ES",
223523
  "Ġprogress ively",
 
223765
  "_T itle",
223766
  "(G tk",
223767
  "Ġc elle",
223768
+ "# ================================================================",
223769
  "ĠJ oomla",
223770
  "\"> //",
223771
  "Month ly",
 
227271
  "ĠRevel ation",
227272
  "Ġrap ide",
227273
  "p unk",
227274
+ "# ----------------------------------------------------------------",
227275
  "Object Id",
227276
  "abin et",
227277
  "extr acomment",
 
227676
  "(c d",
227677
  "Ġun zip",
227678
  "Ġglam orous",
227679
+ "# \",",
227680
  "Ġn aw",
227681
  "Ġmin ib",
227682
  "ĠBr an",
 
227959
  "Ġragaz za",
227960
  "/ tag",
227961
  "Ġirres ist",
227962
+ "# End",
227963
  "****** *čĊ",
227964
  "Ġrestr ained",
227965
  "Ġch iropr",
 
228097
  "Ġgr âce",
228098
  "ĠH elsinki",
228099
  "G ro",
228100
+ "# af",
228101
  "ìĭ Ŀ",
228102
  "Ġsou ha",
228103
  "ĠInd ie",
 
228890
  ".M ESSAGE",
228891
  "(de g",
228892
  "å¿ Ĺ",
228893
+ "#### ##",
228894
  "Ġ\"\" ),",
228895
  "kl är",
228896
  "\\M ail",
 
229105
  "/ grid",
229106
  "Ġfil thy",
229107
  ".e v",
229108
+ "#### #Ċ",
229109
  "Ġs are",
229110
  "Ġso aking",
229111
  "ĠReg ions",
 
229346
  "` ;",
229347
  "ãĥ ĭ",
229348
  "cent e",
229349
+ "# ab",
229350
  "Ġlex ical",
229351
  "ĠB RO",
229352
  "Ġr ulings",
 
229455
  "ĉ props",
229456
  "Ġrot ten",
229457
  "Re jected",
229458
+ "# ac",
229459
  ". ua",
229460
  "ĠAm nesty",
229461
  "Ġpenn ed",
 
229798
  "cul os",
229799
  "_person al",
229800
  "Ġanaly tic",
229801
+ "# create",
229802
  "_mem cpy",
229803
  "(List Node",
229804
  "_T ag",
 
230716
  "ìŀħ ëĭĪëĭ¤",
230717
  "Ġsid eline",
230718
  ".Array Adapter",
230719
+ "#### ##Ċ",
230720
  "ĠSy rians",
230721
  "ĠAtt endance",
230722
  "-es que",
 
231209
  "ALL ERY",
231210
  "c j",
231211
  "x AD",
231212
+ "############################################################################ ###Ċ",
231213
  "Ġitalian i",
231214
  "| #",
231215
  "Ġreg enerate",
 
232178
  "_C ategory",
232179
  "ĠBT N",
232180
  "ĠDar th",
232181
+ "# for",
232182
  "eth nic",
232183
  "arch itecture",
232184
  "ĠCou pe",
 
233253
  "Ġcooper ating",
233254
  "ung i",
233255
  "Ar izona",
233256
+ "# echo",
233257
  "-ex pression",
233258
  ".min utes",
233259
  "Ġpref ixed",
 
233785
  "ĠMom ents",
233786
  "enu ous",
233787
  "IFIC ATE",
233788
+ "#### ###Ċ",
233789
  "æĸĩ ç«ł",
233790
  "á»į c",
233791
  "orm sg",
 
233833
  "ĠIS IL",
233834
  "Ġco op",
233835
  "h ores",
233836
+ "# @",
233837
  "Ġcomp el",
233838
  "(s kip",
233839
  "éĺ ³",
 
234058
  "Ġ-*- čĊ",
234059
  "if es",
234060
  "ĠM ansion",
234061
+ "# Region",
234062
  "C ancellation",
234063
  "Ġnear ing",
234064
  "Ġl angu",
 
234339
  "Ġw reak",
234340
  "Mar vel",
234341
  "/s l",
234342
+ "################################ ########",
234343
  "Ġmov able",
234344
  "Ñĥ й",
234345
  "ĠDr inking",
 
235915
  "ĠPal o",
235916
  "ĠSuz anne",
235917
  "_m appings",
235918
+ "#{ @",
235919
  "ĠOccup ational",
235920
  "_b uckets",
235921
  "go als",
 
236060
  "-w atch",
236061
  "ĠHosp itals",
236062
  "} while",
236063
+ "################ ########",
236064
  "á» £",
236065
  "Ġakt ual",
236066
  "Ġkil ograms",
 
236182
  "Ġcurrent Position",
236183
  "ĠCaucas ian",
236184
  "$ img",
236185
+ "# aa",
236186
  "Ġse an",
236187
  "M ess",
236188
  "*= *=",
 
237127
  "ĠR udd",
237128
  "ĠPres byterian",
237129
  "und ler",
237130
+ "#! [",
237131
  "_l ahir",
237132
  "() ==\"",
237133
  "Access ibility",
 
239607
  "_AT OMIC",
239608
  "Sub Menu",
239609
  "_com press",
239610
+ "# .",
239611
  "Dr v",
239612
  ".push Button",
239613
  "Ġsuit case",
 
239666
  "Ġe books",
239667
  ") size",
239668
  "Ġspecial ised",
239669
+ "# ae",
239670
  "Ġmich ael",
239671
  "ĠSTD OUT",
239672
  "ĠP ell",
 
239845
  "ĠEisen hower",
239846
  "File d",
239847
  "Ġhe bt",
239848
+ "# \\",
239849
  "create QueryBuilder",
239850
  "æľī æķĪ",
239851
  "v anced",
 
239861
  "ol vable",
239862
  "Ġal as",
239863
  "(sp ell",
239864
+ "################ ############",
239865
  "Ġdef amation",
239866
  "( Arg",
239867
  "Ġuser Details",
 
241799
  "Ġsup p",
241800
  "pe on",
241801
  "v rier",
241802
+ "################################################ ########",
241803
  "Ġtrot z",
241804
  "Ġmel tdown",
241805
  "ark ers",
 
242491
  "ser ter",
242492
  "Ġstore front",
242493
  "_ ends",
242494
+ "################################################################################ Ċ",
242495
  "ĉg it",
242496
  "D SP",
242497
  "CH AIN",
 
244014
  "ĠST ILL",
244015
  "Qualified Name",
244016
  ". RES",
244017
+ "# c",
244018
  ".w riteln",
244019
  "ĠImmutable List",
244020
  "ĠTh umb",
 
244996
  "in ne",
244997
  "ĠCONST ANTS",
244998
  "_P rice",
244999
+ "# %%",
245000
  "Ġar sch",
245001
  "ĠNS AttributedString",
245002
  "ĠFile Type",
 
245943
  "_P ICTURE",
245944
  "_OR IENTATION",
245945
  "ĠO PP",
245946
+ "# '",
245947
  "áf ico",
245948
  ".h istogram",
245949
  "ĠB enny",
 
246214
  "ew n",
246215
  "Ġpop ulous",
246216
  "ĠSh ed",
246217
+ "# ${",
246218
  "ĠA lo",
246219
  "Device Info",
246220
  "(IN VOKE",
 
249605
  "ĠD ISTRIBUT",
249606
  "é ments",
249607
  "ĠValid ates",
249608
+ "################################################ ############",
249609
  "Ġ| /",
249610
  "Ġes l",
249611
  "Ġré seau",
 
249625
  "ime i",
249626
  ".get Bounds",
249627
  ".Mouse EventHandler",
249628
+ "#### ###",
249629
  "Generic Type",
249630
  "/c ms",
249631
  "Ġturn o",
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_eos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
@@ -27,7 +28,7 @@
27
  "special": true
28
  },
29
  "151646": {
30
- "content": "<img>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
@@ -35,7 +36,7 @@
35
  "special": true
36
  },
37
  "151647": {
38
- "content": "</img>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
@@ -43,7 +44,7 @@
43
  "special": true
44
  },
45
  "151648": {
46
- "content": "<IMG_CONTEXT>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
@@ -51,7 +52,7 @@
51
  "special": true
52
  },
53
  "151649": {
54
- "content": "<quad>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
@@ -59,7 +60,7 @@
59
  "special": true
60
  },
61
  "151650": {
62
- "content": "</quad>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +68,7 @@
67
  "special": true
68
  },
69
  "151651": {
70
- "content": "<ref>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -75,7 +76,7 @@
75
  "special": true
76
  },
77
  "151652": {
78
- "content": "</ref>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
@@ -83,7 +84,7 @@
83
  "special": true
84
  },
85
  "151653": {
86
- "content": "<box>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
@@ -91,7 +92,7 @@
91
  "special": true
92
  },
93
  "151654": {
94
- "content": "</box>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
@@ -99,7 +100,7 @@
99
  "special": true
100
  },
101
  "151655": {
102
- "content": "[SEG]",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
@@ -107,7 +108,7 @@
107
  "special": true
108
  },
109
  "151656": {
110
- "content": "<p>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
@@ -115,17 +116,196 @@
115
  "special": true
116
  },
117
  "151657": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  "content": "</p>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
122
  "single_word": false,
123
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  },
126
  "additional_special_tokens": [
127
  "<|im_start|>",
128
  "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
129
  "<img>",
130
  "</img>",
131
  "<IMG_CONTEXT>",
@@ -137,11 +317,11 @@
137
  "</box>"
138
  ],
139
  "bos_token": null,
140
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
141
  "clean_up_tokenization_spaces": false,
142
  "eos_token": "<|im_end|>",
143
  "errors": "replace",
144
- "model_max_length": 8192,
145
  "pad_token": "<|endoftext|>",
146
  "padding_side": "right",
147
  "split_special_tokens": false,
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
 
28
  "special": true
29
  },
30
  "151646": {
31
+ "content": "<|object_ref_start|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
 
36
  "special": true
37
  },
38
  "151647": {
39
+ "content": "<|object_ref_end|>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
 
44
  "special": true
45
  },
46
  "151648": {
47
+ "content": "<|box_start|>",
48
  "lstrip": false,
49
  "normalized": false,
50
  "rstrip": false,
 
52
  "special": true
53
  },
54
  "151649": {
55
+ "content": "<|box_end|>",
56
  "lstrip": false,
57
  "normalized": false,
58
  "rstrip": false,
 
60
  "special": true
61
  },
62
  "151650": {
63
+ "content": "<|quad_start|>",
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
 
68
  "special": true
69
  },
70
  "151651": {
71
+ "content": "<|quad_end|>",
72
  "lstrip": false,
73
  "normalized": false,
74
  "rstrip": false,
 
76
  "special": true
77
  },
78
  "151652": {
79
+ "content": "<|vision_start|>",
80
  "lstrip": false,
81
  "normalized": false,
82
  "rstrip": false,
 
84
  "special": true
85
  },
86
  "151653": {
87
+ "content": "<|vision_end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
 
92
  "special": true
93
  },
94
  "151654": {
95
+ "content": "<|vision_pad|>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
 
100
  "special": true
101
  },
102
  "151655": {
103
+ "content": "<|image_pad|>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
 
108
  "special": true
109
  },
110
  "151656": {
111
+ "content": "<|video_pad|>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
 
116
  "special": true
117
  },
118
  "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ },
182
+ "151665": {
183
+ "content": "<img>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ },
190
+ "151666": {
191
+ "content": "</img>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "151667": {
199
+ "content": "<IMG_CONTEXT>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "151668": {
207
+ "content": "<quad>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "151669": {
215
+ "content": "</quad>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "151670": {
223
+ "content": "<ref>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "151671": {
231
+ "content": "</ref>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "151672": {
239
+ "content": "<box>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "151673": {
247
+ "content": "</box>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ },
254
+ "151674": {
255
+ "content": "[SEG]",
256
+ "lstrip": false,
257
+ "normalized": false,
258
+ "rstrip": false,
259
+ "single_word": false,
260
+ "special": true
261
+ },
262
+ "151675": {
263
+ "content": "<p>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false,
268
+ "special": true
269
+ },
270
+ "151676": {
271
  "content": "</p>",
272
  "lstrip": false,
273
  "normalized": false,
274
  "rstrip": false,
275
  "single_word": false,
276
  "special": true
277
+ },
278
+ "151677": {
279
+ "content": "<vp>",
280
+ "lstrip": false,
281
+ "normalized": false,
282
+ "rstrip": false,
283
+ "single_word": false,
284
+ "special": true
285
+ },
286
+ "151678": {
287
+ "content": "</vp>",
288
+ "lstrip": false,
289
+ "normalized": false,
290
+ "rstrip": false,
291
+ "single_word": false,
292
+ "special": true
293
  }
294
  },
295
  "additional_special_tokens": [
296
  "<|im_start|>",
297
  "<|im_end|>",
298
+ "<|object_ref_start|>",
299
+ "<|object_ref_end|>",
300
+ "<|box_start|>",
301
+ "<|box_end|>",
302
+ "<|quad_start|>",
303
+ "<|quad_end|>",
304
+ "<|vision_start|>",
305
+ "<|vision_end|>",
306
+ "<|vision_pad|>",
307
+ "<|image_pad|>",
308
+ "<|video_pad|>",
309
  "<img>",
310
  "</img>",
311
  "<IMG_CONTEXT>",
 
317
  "</box>"
318
  ],
319
  "bos_token": null,
320
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
321
  "clean_up_tokenization_spaces": false,
322
  "eos_token": "<|im_end|>",
323
  "errors": "replace",
324
+ "model_max_length": 16384,
325
  "pad_token": "<|endoftext|>",
326
  "padding_side": "right",
327
  "split_special_tokens": false,