Upload with huggingface_hub
Browse files- tokenizer/merges.txt +647 -0
- tokenizer/special_tokens_map.json +1 -0
- tokenizer/tokenizer_config.json +1 -0
- tokenizer/vocab.json +1 -0
tokenizer/merges.txt
ADDED
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#version: 0.2
|
2 |
+
Ġ t
|
3 |
+
Ġt h
|
4 |
+
Ġ a
|
5 |
+
Ġth e</w>
|
6 |
+
i n
|
7 |
+
Ġ o
|
8 |
+
Ġ ,</w>
|
9 |
+
Ġ s
|
10 |
+
e d</w>
|
11 |
+
Ġ w
|
12 |
+
e r
|
13 |
+
Ġ .</w>
|
14 |
+
Ġ i
|
15 |
+
r e
|
16 |
+
Ġ c
|
17 |
+
n d</w>
|
18 |
+
Ġ f
|
19 |
+
Ġ b
|
20 |
+
a t
|
21 |
+
Ġo f</w>
|
22 |
+
e r</w>
|
23 |
+
e n
|
24 |
+
a r
|
25 |
+
o r
|
26 |
+
i t
|
27 |
+
Ġ p
|
28 |
+
Ġ h
|
29 |
+
Ġa nd</w>
|
30 |
+
o n
|
31 |
+
in g</w>
|
32 |
+
a n
|
33 |
+
r o
|
34 |
+
Ġ m
|
35 |
+
Ġ d
|
36 |
+
e s</w>
|
37 |
+
Ġi n</w>
|
38 |
+
o n</w>
|
39 |
+
Ġt o</w>
|
40 |
+
o u
|
41 |
+
i s
|
42 |
+
Ġ a</w>
|
43 |
+
i c
|
44 |
+
Ġ T
|
45 |
+
a l
|
46 |
+
Ġ l
|
47 |
+
Ġ =</w>
|
48 |
+
Ġ re
|
49 |
+
Ġ "</w>
|
50 |
+
e s
|
51 |
+
Ġ S
|
52 |
+
a s</w>
|
53 |
+
a l</w>
|
54 |
+
i l
|
55 |
+
e l
|
56 |
+
i on</w>
|
57 |
+
Ġ A
|
58 |
+
Ġ C
|
59 |
+
Ġ 1
|
60 |
+
Ġ Ċ</w>
|
61 |
+
u r
|
62 |
+
ĠT h
|
63 |
+
Ġ n
|
64 |
+
a s
|
65 |
+
Ġ @
|
66 |
+
e c
|
67 |
+
o m
|
68 |
+
a c
|
69 |
+
Ġ e
|
70 |
+
Ġw as</w>
|
71 |
+
Ġ M
|
72 |
+
o r</w>
|
73 |
+
a n</w>
|
74 |
+
a m
|
75 |
+
e n</w>
|
76 |
+
o l
|
77 |
+
Ġ in
|
78 |
+
Ġ g
|
79 |
+
Ġ '</w>
|
80 |
+
Ġ B
|
81 |
+
l y</w>
|
82 |
+
a t</w>
|
83 |
+
i v
|
84 |
+
t s</w>
|
85 |
+
ĠTh e</w>
|
86 |
+
u s
|
87 |
+
- @</w>
|
88 |
+
Ġ@ -@</w>
|
89 |
+
i s</w>
|
90 |
+
Ġ I
|
91 |
+
Ġw h
|
92 |
+
i g
|
93 |
+
Ġ H
|
94 |
+
Ġs t
|
95 |
+
o s
|
96 |
+
u n
|
97 |
+
t h
|
98 |
+
Ġ P
|
99 |
+
Ġw it
|
100 |
+
Ġth at</w>
|
101 |
+
i r
|
102 |
+
Ġa s</w>
|
103 |
+
e m
|
104 |
+
Ġo n</w>
|
105 |
+
r a
|
106 |
+
Ġf or</w>
|
107 |
+
Ġ R
|
108 |
+
e t
|
109 |
+
o w
|
110 |
+
Ġ 2
|
111 |
+
i d
|
112 |
+
Ġ D
|
113 |
+
l e</w>
|
114 |
+
Ġwit h</w>
|
115 |
+
l a
|
116 |
+
en t</w>
|
117 |
+
i m
|
118 |
+
Ġ F
|
119 |
+
e a
|
120 |
+
i on
|
121 |
+
Ġb y</w>
|
122 |
+
Ġ )</w>
|
123 |
+
Ġ (</w>
|
124 |
+
Ġa l
|
125 |
+
Ġc on
|
126 |
+
en t
|
127 |
+
Ġ W
|
128 |
+
Ġi s</w>
|
129 |
+
er e</w>
|
130 |
+
Ġ G
|
131 |
+
Ġ N
|
132 |
+
Ġ L
|
133 |
+
Ġh a
|
134 |
+
er s</w>
|
135 |
+
r i
|
136 |
+
t h</w>
|
137 |
+
t ed</w>
|
138 |
+
u c
|
139 |
+
Ġ J
|
140 |
+
Ġ1 9
|
141 |
+
e v
|
142 |
+
u l
|
143 |
+
Ġ v
|
144 |
+
c e</w>
|
145 |
+
at ion</w>
|
146 |
+
ro m</w>
|
147 |
+
Ġb e
|
148 |
+
Ġ E
|
149 |
+
i n</w>
|
150 |
+
Ġth e
|
151 |
+
Ġf rom</w>
|
152 |
+
Ġ O
|
153 |
+
t er</w>
|
154 |
+
Ġp ro
|
155 |
+
Ġa r
|
156 |
+
a d
|
157 |
+
Ġc om
|
158 |
+
i c</w>
|
159 |
+
a g
|
160 |
+
Ġh is</w>
|
161 |
+
Ġs h
|
162 |
+
Ġa t</w>
|
163 |
+
o v
|
164 |
+
i es</w>
|
165 |
+
o o
|
166 |
+
p p
|
167 |
+
s t
|
168 |
+
c h
|
169 |
+
Ġ r
|
170 |
+
Ġ2 0
|
171 |
+
a y</w>
|
172 |
+
i f
|
173 |
+
Ġw ere</w>
|
174 |
+
Ġc h
|
175 |
+
u t</w>
|
176 |
+
s t</w>
|
177 |
+
u t
|
178 |
+
d s</w>
|
179 |
+
o p
|
180 |
+
u m
|
181 |
+
Ġi t</w>
|
182 |
+
o c
|
183 |
+
t er
|
184 |
+
l e
|
185 |
+
ig h
|
186 |
+
u d
|
187 |
+
Ġe x
|
188 |
+
ion s</w>
|
189 |
+
at e</w>
|
190 |
+
it y</w>
|
191 |
+
at ed</w>
|
192 |
+
Ġ un
|
193 |
+
e p
|
194 |
+
q u
|
195 |
+
Ġn o
|
196 |
+
Ġ K
|
197 |
+
iv e</w>
|
198 |
+
is t
|
199 |
+
Ġo n
|
200 |
+
am e</w>
|
201 |
+
ou n
|
202 |
+
i r</w>
|
203 |
+
a b
|
204 |
+
Ġ â
|
205 |
+
in g
|
206 |
+
Ġh e</w>
|
207 |
+
l d</w>
|
208 |
+
u g
|
209 |
+
ic h</w>
|
210 |
+
Ġa n</w>
|
211 |
+
e d
|
212 |
+
Ġ k
|
213 |
+
Ġâ Ģ
|
214 |
+
Ġha d</w>
|
215 |
+
v e</w>
|
216 |
+
a in
|
217 |
+
Ġs e
|
218 |
+
t ion</w>
|
219 |
+
or e</w>
|
220 |
+
re s
|
221 |
+
Ġwh ich</w>
|
222 |
+
ĠI n</w>
|
223 |
+
o d
|
224 |
+
th er</w>
|
225 |
+
a k
|
226 |
+
Ġs p
|
227 |
+
a r</w>
|
228 |
+
Ġ y
|
229 |
+
ĠC h
|
230 |
+
on g</w>
|
231 |
+
Ġa c
|
232 |
+
es t</w>
|
233 |
+
Ġ U
|
234 |
+
a p
|
235 |
+
f f
|
236 |
+
al ly</w>
|
237 |
+
r it
|
238 |
+
ĠS t
|
239 |
+
u b
|
240 |
+
g e</w>
|
241 |
+
b er</w>
|
242 |
+
e t</w>
|
243 |
+
Ġb e</w>
|
244 |
+
e ar
|
245 |
+
Ġre c
|
246 |
+
er s
|
247 |
+
Ġf ir
|
248 |
+
o t
|
249 |
+
Ġar e</w>
|
250 |
+
Ġa n
|
251 |
+
c h</w>
|
252 |
+
o g
|
253 |
+
i a</w>
|
254 |
+
es t
|
255 |
+
in e</w>
|
256 |
+
il l
|
257 |
+
an d
|
258 |
+
e l</w>
|
259 |
+
ar y</w>
|
260 |
+
e w</w>
|
261 |
+
i d</w>
|
262 |
+
Ġf or
|
263 |
+
Ġ ;</w>
|
264 |
+
Ġcom p
|
265 |
+
Ġ V
|
266 |
+
Ġin c
|
267 |
+
t r
|
268 |
+
Ġ20 0
|
269 |
+
Ġthe ir</w>
|
270 |
+
u s</w>
|
271 |
+
Ġb ut</w>
|
272 |
+
r an
|
273 |
+
ic al</w>
|
274 |
+
Ġfir st</w>
|
275 |
+
Ġd e
|
276 |
+
Ġin t
|
277 |
+
Ġ ro
|
278 |
+
s o</w>
|
279 |
+
ĠâĢ ĵ</w>
|
280 |
+
Ġno t</w>
|
281 |
+
d ing</w>
|
282 |
+
f ter</w>
|
283 |
+
ur e</w>
|
284 |
+
Ġp ar
|
285 |
+
Ġ :</w>
|
286 |
+
i an</w>
|
287 |
+
Ġt w
|
288 |
+
ou ld</w>
|
289 |
+
Ġal so</w>
|
290 |
+
Ġi ts</w>
|
291 |
+
Ġw or
|
292 |
+
u m</w>
|
293 |
+
Ġo r</w>
|
294 |
+
os t</w>
|
295 |
+
0 0</w>
|
296 |
+
ou r
|
297 |
+
ar d</w>
|
298 |
+
Ġre s
|
299 |
+
m p
|
300 |
+
u e</w>
|
301 |
+
Ġa b
|
302 |
+
is h</w>
|
303 |
+
Ġcon t
|
304 |
+
Ġa d
|
305 |
+
ow n</w>
|
306 |
+
al l</w>
|
307 |
+
ou g
|
308 |
+
Ġh er</w>
|
309 |
+
as t</w>
|
310 |
+
Ġ en
|
311 |
+
om e</w>
|
312 |
+
al l
|
313 |
+
d ed</w>
|
314 |
+
o w</w>
|
315 |
+
Ġha ve</w>
|
316 |
+
Ġ us
|
317 |
+
ea r</w>
|
318 |
+
ac k</w>
|
319 |
+
d uc
|
320 |
+
i al</w>
|
321 |
+
s s
|
322 |
+
en ts</w>
|
323 |
+
a in</w>
|
324 |
+
t ing</w>
|
325 |
+
Ġon e</w>
|
326 |
+
es s
|
327 |
+
Ġh as</w>
|
328 |
+
igh t</w>
|
329 |
+
a v
|
330 |
+
Ġe v
|
331 |
+
ou t</w>
|
332 |
+
a y
|
333 |
+
en ce</w>
|
334 |
+
Ġbe en</w>
|
335 |
+
e w
|
336 |
+
Ġtw o</w>
|
337 |
+
Ġc l
|
338 |
+
d er</w>
|
339 |
+
im e</w>
|
340 |
+
k s</w>
|
341 |
+
es s</w>
|
342 |
+
is h
|
343 |
+
. @</w>
|
344 |
+
Ġ@ .@</w>
|
345 |
+
Ġp la
|
346 |
+
Ġp l
|
347 |
+
Ġo r
|
348 |
+
u p</w>
|
349 |
+
m ent</w>
|
350 |
+
ur ing</w>
|
351 |
+
ol l
|
352 |
+
ĠI n
|
353 |
+
Ġth is</w>
|
354 |
+
Ġb ec
|
355 |
+
Ġcom m
|
356 |
+
Ġd is
|
357 |
+
at er</w>
|
358 |
+
ag e</w>
|
359 |
+
Ġa pp
|
360 |
+
ou s</w>
|
361 |
+
e y</w>
|
362 |
+
i l</w>
|
363 |
+
p er
|
364 |
+
ĠA l
|
365 |
+
ion al</w>
|
366 |
+
l ud
|
367 |
+
el y</w>
|
368 |
+
t t
|
369 |
+
il e</w>
|
370 |
+
i z
|
371 |
+
Ġ j
|
372 |
+
Ġwh o</w>
|
373 |
+
Ġa g
|
374 |
+
i b
|
375 |
+
Ġthe y</w>
|
376 |
+
f or
|
377 |
+
Ġo v
|
378 |
+
at h
|
379 |
+
e g
|
380 |
+
Ġs c
|
381 |
+
i p
|
382 |
+
Ġ20 1
|
383 |
+
Ġ 3
|
384 |
+
Ġp er
|
385 |
+
or y</w>
|
386 |
+
Ġd es
|
387 |
+
id e</w>
|
388 |
+
Ġs er
|
389 |
+
s e</w>
|
390 |
+
ĠH e</w>
|
391 |
+
la nd</w>
|
392 |
+
at ions</w>
|
393 |
+
r ic
|
394 |
+
i t</w>
|
395 |
+
re s</w>
|
396 |
+
er ed</w>
|
397 |
+
Ġp re
|
398 |
+
ĠS h
|
399 |
+
an ce</w>
|
400 |
+
or t</w>
|
401 |
+
an t</w>
|
402 |
+
, @</w>
|
403 |
+
Ġ@ ,@</w>
|
404 |
+
el l</w>
|
405 |
+
Ġ Y
|
406 |
+
n ed</w>
|
407 |
+
el l
|
408 |
+
it e</w>
|
409 |
+
Ġinc lud
|
410 |
+
Ġre p
|
411 |
+
Ġa fter</w>
|
412 |
+
Ġs uc
|
413 |
+
re e</w>
|
414 |
+
an y</w>
|
415 |
+
i m</w>
|
416 |
+
or t
|
417 |
+
Ġ1 8
|
418 |
+
Ġs u
|
419 |
+
ad e</w>
|
420 |
+
ou r</w>
|
421 |
+
ĠU n
|
422 |
+
ĠI t</w>
|
423 |
+
i k
|
424 |
+
ĠM ar
|
425 |
+
em ber</w>
|
426 |
+
Ġ 1</w>
|
427 |
+
e en</w>
|
428 |
+
a nd</w>
|
429 |
+
Ġs ec
|
430 |
+
ic e</w>
|
431 |
+
Ġt ime</w>
|
432 |
+
ĠA n
|
433 |
+
Ġint o</w>
|
434 |
+
Ġf in
|
435 |
+
Ġo ther</w>
|
436 |
+
Ġa tt
|
437 |
+
il l</w>
|
438 |
+
re n
|
439 |
+
ac h
|
440 |
+
as s
|
441 |
+
er al</w>
|
442 |
+
es e</w>
|
443 |
+
s h
|
444 |
+
al s</w>
|
445 |
+
it ion</w>
|
446 |
+
oug h</w>
|
447 |
+
l es</w>
|
448 |
+
am p
|
449 |
+
Ġw ould</w>
|
450 |
+
Ġm ore</w>
|
451 |
+
ro ug
|
452 |
+
ri b
|
453 |
+
er y</w>
|
454 |
+
ac e</w>
|
455 |
+
Ġ A</w>
|
456 |
+
Ġpla y
|
457 |
+
it ed</w>
|
458 |
+
k ed</w>
|
459 |
+
is t</w>
|
460 |
+
i ed</w>
|
461 |
+
Ġ 2</w>
|
462 |
+
as ed</w>
|
463 |
+
ing s</w>
|
464 |
+
an g
|
465 |
+
a m</w>
|
466 |
+
i p</w>
|
467 |
+
Ġb o
|
468 |
+
ab le</w>
|
469 |
+
t y</w>
|
470 |
+
Ġch ar
|
471 |
+
Ġc ent
|
472 |
+
et w
|
473 |
+
at es</w>
|
474 |
+
ro p
|
475 |
+
Ġ I</w>
|
476 |
+
u nd</w>
|
477 |
+
ĠA m
|
478 |
+
c es</w>
|
479 |
+
o in
|
480 |
+
Ġin ter
|
481 |
+
u p
|
482 |
+
c t
|
483 |
+
on e</w>
|
484 |
+
Ġt ra
|
485 |
+
an t
|
486 |
+
ec t
|
487 |
+
Ġal l</w>
|
488 |
+
e f
|
489 |
+
Ġcon s
|
490 |
+
ub l
|
491 |
+
n ing</w>
|
492 |
+
an s</w>
|
493 |
+
Ġf e
|
494 |
+
us t</w>
|
495 |
+
Ġ 0
|
496 |
+
Ġre m
|
497 |
+
as e</w>
|
498 |
+
on g
|
499 |
+
Ġwh en</w>
|
500 |
+
e b
|
501 |
+
ĠW h
|
502 |
+
Ġe ar
|
503 |
+
ev er</w>
|
504 |
+
Ġov er</w>
|
505 |
+
Ġk n
|
506 |
+
a us
|
507 |
+
Ġp os
|
508 |
+
a d</w>
|
509 |
+
er m
|
510 |
+
Ġsh e</w>
|
511 |
+
Ġ ra
|
512 |
+
Ġd uring</w>
|
513 |
+
as on</w>
|
514 |
+
v i
|
515 |
+
Ġex p
|
516 |
+
Ġl ea
|
517 |
+
Ġ el
|
518 |
+
Ġ 4
|
519 |
+
Ġon ly</w>
|
520 |
+
o nd</w>
|
521 |
+
Ġd ec
|
522 |
+
Ġac c
|
523 |
+
Ġo ff
|
524 |
+
is s
|
525 |
+
Ġf l
|
526 |
+
ĠE n
|
527 |
+
o t</w>
|
528 |
+
en s
|
529 |
+
os e</w>
|
530 |
+
ak e</w>
|
531 |
+
o m</w>
|
532 |
+
Ġs ev
|
533 |
+
ac h</w>
|
534 |
+
etw een</w>
|
535 |
+
er n
|
536 |
+
Ġ 3</w>
|
537 |
+
Ġp r
|
538 |
+
Ġg ro
|
539 |
+
r uc
|
540 |
+
Ġd i
|
541 |
+
Ġ19 9
|
542 |
+
ĠA r
|
543 |
+
Ġg ame</w>
|
544 |
+
Ġh im</w>
|
545 |
+
oo k</w>
|
546 |
+
Ġ up</w>
|
547 |
+
Ġab out</w>
|
548 |
+
Ġre l
|
549 |
+
for m
|
550 |
+
Ġth ree</w>
|
551 |
+
at t
|
552 |
+
ĠC om
|
553 |
+
Ġs a
|
554 |
+
ear s</w>
|
555 |
+
Ġ 5
|
556 |
+
r y</w>
|
557 |
+
Ġi mp
|
558 |
+
Ġm ost</w>
|
559 |
+
f er
|
560 |
+
Ġp res
|
561 |
+
Ġf il
|
562 |
+
Ġb etween</w>
|
563 |
+
Ġbe g
|
564 |
+
p h
|
565 |
+
or s</w>
|
566 |
+
Ġth an</w>
|
567 |
+
Ġrec or
|
568 |
+
o b
|
569 |
+
er ic
|
570 |
+
at ing</w>
|
571 |
+
Ġth roug
|
572 |
+
k ing</w>
|
573 |
+
Ġo ut</w>
|
574 |
+
Ġn um
|
575 |
+
oo d</w>
|
576 |
+
oll ow
|
577 |
+
ac t
|
578 |
+
u il
|
579 |
+
Ġc re
|
580 |
+
ol og
|
581 |
+
at ional</w>
|
582 |
+
Ġpro duc
|
583 |
+
Ġwh ile</w>
|
584 |
+
Ġl ater</w>
|
585 |
+
Ġw rit
|
586 |
+
e x
|
587 |
+
Ġst ar
|
588 |
+
Ġsp ec
|
589 |
+
e e
|
590 |
+
ish ed</w>
|
591 |
+
Ġre g
|
592 |
+
is ion</w>
|
593 |
+
ou th</w>
|
594 |
+
Ġre le
|
595 |
+
Ġa ss
|
596 |
+
Ġse ason</w>
|
597 |
+
Ġm ade</w>
|
598 |
+
il y</w>
|
599 |
+
r u
|
600 |
+
o y
|
601 |
+
t ur
|
602 |
+
t e</w>
|
603 |
+
Ġ qu
|
604 |
+
Ġm ov
|
605 |
+
ur y</w>
|
606 |
+
ĠAm eric
|
607 |
+
em ent</w>
|
608 |
+
c c
|
609 |
+
ou nd</w>
|
610 |
+
Ġl ar
|
611 |
+
Ġfor m
|
612 |
+
ec t</w>
|
613 |
+
Ġde f
|
614 |
+
Ġm us
|
615 |
+
ĠP ar
|
616 |
+
Ġm e
|
617 |
+
Ġs ub
|
618 |
+
w ay</w>
|
619 |
+
o p</w>
|
620 |
+
o h
|
621 |
+
el d</w>
|
622 |
+
i e</w>
|
623 |
+
em p
|
624 |
+
am es</w>
|
625 |
+
er n</w>
|
626 |
+
Ġn or
|
627 |
+
iv ed</w>
|
628 |
+
ev el
|
629 |
+
Ġsuc h</w>
|
630 |
+
ar ds</w>
|
631 |
+
Ġin d
|
632 |
+
ik e</w>
|
633 |
+
Ġg en
|
634 |
+
er t
|
635 |
+
Ġy ear</w>
|
636 |
+
Ġus ed</w>
|
637 |
+
Ġn ew</w>
|
638 |
+
Ġ 5</w>
|
639 |
+
Ġal b
|
640 |
+
s p
|
641 |
+
y p
|
642 |
+
Ġwit h
|
643 |
+
Ġwh ere</w>
|
644 |
+
ic s</w>
|
645 |
+
ĠTh is</w>
|
646 |
+
Ġthe m</w>
|
647 |
+
w n</w>
|
tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
|
tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"errors": "replace", "max_len": 77, "bos_token": "<|startoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>", "pad_token": "<|endoftext|>", "tokenizer_class": "CLIPTokenizer"}
|
tokenizer/vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"!": 2, "!</w>": 345, "\"": 3, "\"</w>": 344, "#": 4, "#</w>": 325, "$": 5, "$</w>": 348, "%": 6, "%</w>": 351, "&": 7, "&</w>": 352, "'": 8, "'</w>": 296, "(": 9, "(</w>": 318, ")": 10, ")</w>": 330, "*": 11, "*</w>": 327, "+": 12, "+</w>": 341, ",": 13, ",</w>": 279, ",@</w>": 754, "-": 14, "-</w>": 276, "-@</w>": 439, ".": 15, ".</w>": 253, ".@</w>": 695, "/": 16, "/</w>": 350, "0": 17, "00</w>": 647, "0</w>": 216, "1": 18, "1</w>": 222, "2": 19, "2</w>": 231, "3": 20, "3</w>": 243, "4": 21, "4</w>": 233, "5": 22, "5</w>": 240, "6": 23, "6</w>": 226, "7": 24, "7</w>": 215, "8": 25, "8</w>": 236, "9": 26, "9</w>": 242, ":": 27, ":</w>": 353, ";": 28, ";</w>": 317, "<": 29, "<</w>": 340, "<|endoftext|>": 1, "<|startoftext|>": 0, "=": 30, "=</w>": 342, ">": 31, "></w>": 300, "?": 32, "?</w>": 346, "@": 33, "@</w>": 320, "A": 34, "A</w>": 227, "B": 35, "B</w>": 258, "C": 36, "C</w>": 239, "D": 37, "D</w>": 255, "E": 38, "E</w>": 246, "F": 39, "F</w>": 213, "G": 40, "G</w>": 283, "H": 41, "H</w>": 219, "I": 42, "I</w>": 237, "J": 43, "J</w>": 251, "K": 44, "K</w>": 254, "L": 45, "L</w>": 218, "M": 46, "M</w>": 234, "N": 47, "N</w>": 238, "O": 48, "O</w>": 265, "P": 49, "P</w>": 245, "Q": 50, "Q</w>": 309, "R": 51, "R</w>": 264, "S": 52, "S</w>": 230, "T": 53, "T</w>": 235, "U": 54, "U</w>": 268, "V": 55, "V</w>": 248, "W": 56, "W</w>": 274, "X": 57, "X</w>": 263, "Y": 58, "Y</w>": 310, "Z": 59, "Z</w>": 207, "[": 60, "[</w>": 270, "\\": 61, "\\</w>": 338, "]": 62, "]</w>": 289, "^": 63, "^</w>": 331, "_": 64, "_</w>": 334, "`": 65, "`</w>": 347, "a": 66, "a</w>": 197, "ab": 555, "able</w>": 820, "ac": 420, "ace</w>": 806, "ach": 791, "ach</w>": 885, "ack</w>": 670, "act": 929, "ad": 508, "ad</w>": 860, "ade</w>": 771, "ag": 511, "age</w>": 710, "ain": 568, "ain</w>": 675, "ak": 577, "ake</w>": 882, "al": 397, "al</w>": 405, "all": 664, "all</w>": 658, "ally</w>": 588, "als</w>": 796, "am": 426, "am</w>": 817, "ame</w>": 552, "ames</w>": 976, "amp": 800, "an": 384, "an</w>": 425, "ance</w>": 751, "and": 609, "and</w>": 780, "ang": 816, "ans</w>": 844, "ant": 837, "ant</w>": 753, "any</w>": 766, "ap": 586, "ar": 376, "ar</w>": 579, "ard</w>": 649, "ards</w>": 982, "ary</w>": 611, "as": 416, "as</w>": 404, "ase</w>": 849, "ased</w>": 814, "ason</w>": 865, "ass": 792, "ast</w>": 661, "at": 372, "at</w>": 434, "ate</w>": 541, "ated</w>": 543, "ater</w>": 709, "ates</w>": 825, "ath": 730, "ating</w>": 922, "ation</w>": 497, "ational</w>": 933, "ations</w>": 744, "att": 903, "aus": 858, "av": 681, "ay": 684, "ay</w>": 523, "b": 67, "b</w>": 212, "ber</w>": 593, "c": 68, "c</w>": 224, "cc": 960, "ce</w>": 496, "ces</w>": 830, "ch": 520, "ch</w>": 603, "ct": 834, "d": 69, "d</w>": 196, "ded</w>": 665, "der</w>": 690, "ding</w>": 633, "ds</w>": 530, "duc": 671, "e": 70, "e</w>": 195, "ea": 471, "ear": 596, "ear</w>": 669, "ears</w>": 906, "eb": 852, "ec": 418, "ect": 838, "ect</w>": 964, "ed": 563, "ed</w>": 362, "ee": 941, "een</w>": 779, "ef": 840, "eg": 731, "el": 407, "el</w>": 610, "eld</w>": 973, "ell": 759, "ell</w>": 756, "ely</w>": 719, "em": 455, "ember</w>": 777, "ement</w>": 959, "emp": 975, "en": 375, "en</w>": 427, "ence</w>": 685, "ens": 880, "ent": 478, "ent</w>": 468, "ents</w>": 674, "ep": 545, "er": 364, "er</w>": 374, "eral</w>": 793, "ere</w>": 481, "ered</w>": 748, "eric": 921, "erm": 861, "ern": 887, "ern</w>": 977, "ers": 598, "ers</w>": 486, "ert": 986, "ery</w>": 805, "es": 402, "es</w>": 388, "ese</w>": 794, "ess": 678, "ess</w>": 693, "est": 606, "est</w>": 584, "et": 460, "et</w>": 594, "etw": 824, "etween</w>": 886, "ev": 493, "evel": 980, "ever</w>": 855, "ew": 687, "ew</w>": 612, "ex": 938, "ey</w>": 713, "f": 71, "f</w>": 209, "fer": 911, "ff": 587, "for": 728, "form": 901, "fter</w>": 634, "g": 72, "g</w>": 214, "ge</w>": 592, "h": 73, "h</w>": 203, "i": 74, "i</w>": 205, "ia</w>": 605, "ial</w>": 672, "ian</w>": 638, "ib": 726, "ic": 395, "ic</w>": 510, "ical</w>": 625, "ice</w>": 782, "ich</w>": 561, "ics</w>": 996, "id": 463, "id</w>": 613, "ide</w>": 739, "ie</w>": 974, "ied</w>": 812, "ies</w>": 516, "if": 524, "ig": 444, "igh": 537, "ight</w>": 680, "ik": 775, "ike</w>": 984, "il": 406, "il</w>": 714, "ile</w>": 721, "ill": 608, "ill</w>": 789, "ily</w>": 950, "im": 469, "im</w>": 767, "ime</w>": 691, "in": 358, "in</w>": 501, "ine</w>": 607, "ing": 557, "ing</w>": 383, "ings</w>": 815, "ion": 472, "ion</w>": 408, "ional</w>": 717, "ions</w>": 540, "ip": 733, "ip</w>": 818, "ir": 453, "ir</w>": 554, "is": 393, "is</w>": 441, "ish": 694, "ish</w>": 654, "ished</w>": 942, "ision</w>": 944, "iss": 876, "ist": 550, "ist</w>": 811, "it": 378, "it</w>": 746, "ite</w>": 760, "ited</w>": 809, "ition</w>": 797, "ity</w>": 542, "iv": 435, "ive</w>": 549, "ived</w>": 979, "iz": 722, "j": 75, "j</w>": 288, "k": 76, "k</w>": 210, "ked</w>": 810, "king</w>": 924, "ks</w>": 692, "l": 77, "l</w>": 201, "la": 467, "land</w>": 743, "ld</w>": 559, "le": 536, "le</w>": 465, "les</w>": 799, "lud": 718, "ly</w>": 433, "m": 78, "m</w>": 202, "ment</w>": 701, "mp": 651, "n": 79, "n</w>": 199, "nd</w>": 369, "ned</w>": 758, "ning</w>": 843, "o": 80, "o</w>": 198, "ob": 920, "oc": 534, "od": 575, "og": 604, "oh": 972, "oin": 831, "ol": 428, "oll": 703, "ollow": 928, "olog": 932, "om": 419, "om</w>": 883, "ome</w>": 663, "on": 382, "on</w>": 390, "ond</w>": 872, "one</w>": 835, "ong": 850, "ong</w>": 582, "oo": 517, "ood</w>": 927, "ook</w>": 897, "op": 531, "op</w>": 971, "or": 377, "or</w>": 424, "ore</w>": 571, "ors</w>": 917, "ort": 768, "ort</w>": 752, "ory</w>": 737, "os": 447, "ose</w>": 881, "ost</w>": 646, "ot": 600, "ot</w>": 879, "ou": 392, "oug": 659, "ough</w>": 798, "ould</w>": 640, "oun": 553, "ound</w>": 961, "our": 648, "our</w>": 772, "ous</w>": 712, "out</w>": 683, "outh</w>": 945, "ov": 515, "ow": 461, "ow</w>": 666, "own</w>": 657, "oy": 952, "p": 81, "p</w>": 217, "per": 715, "ph": 916, "pp": 518, "q": 82, "q</w>": 280, "qu": 546, "r": 83, "r</w>": 204, "ra": 457, "ran": 624, "re": 367, "ree</w>": 765, "ren": 790, "res": 572, "res</w>": 747, "ri": 487, "rib": 804, "ric": 745, "rit": 589, "ro": 385, "rom</w>": 498, "rop": 826, "roug": 803, "ru": 951, "ruc": 891, "ry</w>": 908, "s": 84, "s</w>": 206, "se</w>": 741, "sh": 795, "so</w>": 630, "sp": 992, "ss": 673, "st": 519, "st</w>": 528, "t": 85, "t</w>": 208, "te</w>": 954, "ted</w>": 489, "ter": 535, "ter</w>": 505, "th": 449, "th</w>": 488, "ther</w>": 576, "ting</w>": 676, "tion</w>": 570, "tr": 619, "ts</w>": 436, "tt": 720, "tur": 953, "ty</w>": 821, "u": 86, "u</w>": 229, "ub": 591, "ubl": 842, "uc": 490, "ud": 538, "ue</w>": 652, "ug": 560, "uil": 930, "ul": 494, "um": 532, "um</w>": 644, "un": 448, "und</w>": 828, "up": 833, "up</w>": 700, "ur": 413, "ure</w>": 635, "uring</w>": 702, "ury</w>": 957, "us": 438, "us</w>": 622, "ust</w>": 846, "ut": 529, "ut</w>": 527, "v": 87, "v</w>": 232, "ve</w>": 567, "vi": 866, "w": 88, "w</w>": 250, "way</w>": 970, "wn</w>": 999, "x": 89, "x</w>": 269, "y": 90, "y</w>": 211, "yp": 993, "z": 91, "z</w>": 228, "|": 92, "|</w>": 304, "}": 93, "}</w>": 336, "~": 94, "~</w>": 343, "¡": 95, "¡</w>": 220, "¢": 96, "¢</w>": 306, "£": 97, "£</w>": 323, "¤": 98, "¤</w>": 292, "¥": 99, "¥</w>": 339, "¦": 100, "¦</w>": 303, "§": 101, "§</w>": 275, "¨": 102, "¨</w>": 282, "©": 103, "©</w>": 259, "ª": 104, "ª</w>": 286, "«": 105, "«</w>": 266, "¬": 106, "¬</w>": 319, "®": 107, "®</w>": 329, "¯": 108, "¯</w>": 287, "°": 109, "°</w>": 298, "±": 110, "±</w>": 200, "²": 111, "²</w>": 284, "³": 112, "³</w>": 272, "´": 113, "´</w>": 307, "µ": 114, "µ</w>": 261, "¶": 115, "¶</w>": 301, "·": 116, "·</w>": 326, "¸": 117, "¸</w>": 257, "¹": 118, "¹</w>": 241, "º": 119, "º</w>": 260, "»": 120, "»</w>": 247, "¼": 121, "¼</w>": 305, "½": 122, "½</w>": 294, "¾": 123, "¾</w>": 316, "¿": 124, "¿</w>": 271, "Â": 125, "Ã": 126, "Ä": 127, "Å": 128, "Æ": 129, "Ç": 130, "È": 131, "É": 132, "Ê": 133, "Ë": 134, "Ì": 135, "Í": 136, "Î": 137, "Ï": 138, "Ð": 139, "Ñ": 140, "Ö": 141, "×": 142, "Ø": 143, "Ù": 144, "Ü": 145, "à": 146, "á": 147, "â": 148, "ã": 149, "ä": 150, "å": 151, "æ": 152, "ç": 153, "è": 154, "é": 155, "ë": 156, "ì": 157, "ï": 158, "Ċ": 159, "Ċ</w>": 349, "Ġ": 160, "Ġ\"</w>": 401, "Ġ'</w>": 431, "Ġ(</w>": 475, "Ġ)</w>": 474, "Ġ,</w>": 360, "Ġ.</w>": 365, "Ġ0": 847, "Ġ1": 411, "Ġ18": 769, "Ġ19": 492, "Ġ199": 893, "Ġ1</w>": 778, "Ġ2": 462, "Ġ20": 522, "Ġ200": 620, "Ġ201": 734, "Ġ2</w>": 813, "Ġ3": 735, "Ġ3</w>": 888, "Ġ4": 870, "Ġ5": 907, "Ġ5</w>": 990, "Ġ:</w>": 637, "Ġ;</w>": 615, "Ġ</w>": 333, "Ġ=</w>": 399, "Ġ@": 417, "Ġ@,@</w>": 755, "Ġ@-@</w>": 440, "Ġ@.@</w>": 696, "ĠA": 409, "ĠA</w>": 807, "ĠAl": 716, "ĠAm": 829, "ĠAmeric": 958, "ĠAn": 784, "ĠAr": 894, "ĠB": 432, "ĠC": 410, "ĠCh": 581, "ĠCom": 904, "ĠD": 464, "ĠE": 500, "ĠEn": 878, "ĠF": 470, "ĠG": 482, "ĠH": 445, "ĠHe</w>": 742, "ĠI": 442, "ĠI</w>": 827, "ĠIn": 704, "ĠIn</w>": 574, "ĠIt</w>": 774, "ĠJ": 491, "ĠK": 548, "ĠL": 484, "ĠM": 423, "ĠMar": 776, "ĠN": 483, "ĠO": 504, "ĠP": 450, "ĠPar": 967, "ĠR": 459, "ĠS": 403, "ĠSh": 750, "ĠSt": 590, "ĠT": 396, "ĠTh": 414, "ĠThe</w>": 437, "ĠThis</w>": 997, "ĠU": 585, "ĠUn": 773, "ĠV": 617, "ĠW": 479, "ĠWh": 853, "ĠY": 757, "Ġa": 356, "Ġa</w>": 394, "Ġab": 653, "Ġabout</w>": 899, "Ġac": 583, "Ġacc": 874, "Ġad": 656, "Ġafter</w>": 763, "Ġag": 725, "Ġal": 476, "Ġalb": 991, "Ġall</w>": 839, "Ġalso</w>": 641, "Ġan": 602, "Ġan</w>": 562, "Ġand</w>": 381, "Ġapp": 711, "Ġar": 507, "Ġare</w>": 601, "Ġas</w>": 454, "Ġass": 947, "Ġat</w>": 514, "Ġatt": 788, "Ġb": 371, "Ġbe": 499, "Ġbe</w>": 595, "Ġbec": 706, "Ġbeen</w>": 686, "Ġbeg": 915, "Ġbetween</w>": 914, "Ġbo": 819, "Ġbut</w>": 623, "Ġby</w>": 473, "Ġc": 368, "Ġcent": 823, "Ġch": 526, "Ġchar": 822, "Ġcl": 689, "Ġcom": 509, "Ġcomm": 707, "Ġcomp": 616, "Ġcon": 477, "Ġcons": 841, "Ġcont": 655, "Ġcre": 931, "Ġd": 387, "Ġde": 627, "Ġdec": 873, "Ġdef": 965, "Ġdes": 738, "Ġdi": 892, "Ġdis": 708, "Ġduring</w>": 864, "Ġe": 421, "Ġear": 854, "Ġel": 869, "Ġen": 662, "Ġev": 682, "Ġex": 539, "Ġexp": 867, "Ġf": 370, "Ġfe": 845, "Ġfil": 913, "Ġfin": 786, "Ġfir": 599, "Ġfirst</w>": 626, "Ġfl": 877, "Ġfor": 614, "Ġfor</w>": 458, "Ġform": 963, "Ġfrom</w>": 503, "Ġg": 430, "Ġgame</w>": 895, "Ġgen": 985, "Ġgro": 890, "Ġh": 380, "Ġha": 485, "Ġhad</w>": 566, "Ġhas</w>": 679, "Ġhave</w>": 667, "Ġhe</w>": 558, "Ġher</w>": 660, "Ġhim</w>": 896, "Ġhis</w>": 512, "Ġi": 366, "Ġimp": 909, "Ġin": 429, "Ġin</w>": 389, "Ġinc": 618, "Ġinclud": 761, "Ġind": 983, "Ġint": 628, "Ġinter": 832, "Ġinto</w>": 785, "Ġis</w>": 480, "Ġit</w>": 533, "Ġits</w>": 642, "Ġj": 723, "Ġk": 564, "Ġkn": 857, "Ġl": 398, "Ġlar": 962, "Ġlater</w>": 936, "Ġlea": 868, "Ġm": 386, "Ġmade</w>": 949, "Ġme": 968, "Ġmore</w>": 802, "Ġmost</w>": 910, "Ġmov": 956, "Ġmus": 966, "Ġn": 415, "Ġnew</w>": 989, "Ġno": 547, "Ġnor": 978, "Ġnot</w>": 632, "Ġnum": 926, "Ġo": 359, "Ġof</w>": 373, "Ġoff": 875, "Ġon": 551, "Ġon</w>": 456, "Ġone</w>": 677, "Ġonly</w>": 871, "Ġor": 699, "Ġor</w>": 645, "Ġother</w>": 787, "Ġout</w>": 925, "Ġov": 729, "Ġover</w>": 856, "Ġp": 379, "Ġpar": 636, "Ġper": 736, "Ġpl": 698, "Ġpla": 697, "Ġplay": 808, "Ġpos": 859, "Ġpr": 889, "Ġpre": 749, "Ġpres": 912, "Ġpro": 506, "Ġproduc": 934, "Ġqu": 955, "Ġr": 521, "Ġra": 863, "Ġre": 400, "Ġrec": 597, "Ġrecor": 919, "Ġreg": 943, "Ġrel": 900, "Ġrele": 946, "Ġrem": 848, "Ġrep": 762, "Ġres": 650, "Ġro": 629, "Ġs": 361, "Ġsa": 905, "Ġsc": 732, "Ġse": 569, "Ġseason</w>": 948, "Ġsec": 781, "Ġser": 740, "Ġsev": 884, "Ġsh": 513, "Ġshe</w>": 862, "Ġsp": 578, "Ġspec": 940, "Ġst": 446, "Ġstar": 939, "Ġsu": 770, "Ġsub": 969, "Ġsuc": 764, "Ġsuch</w>": 981, "Ġt": 354, "Ġth": 355, "Ġthan</w>": 918, "Ġthat</w>": 452, "Ġthe": 502, "Ġthe</w>": 357, "Ġtheir</w>": 621, "Ġthem</w>": 998, "Ġthey</w>": 727, "Ġthis</w>": 705, "Ġthree</w>": 902, "Ġthroug": 923, "Ġtime</w>": 783, "Ġto</w>": 391, "Ġtra": 836, "Ġtw": 639, "Ġtwo</w>": 688, "Ġun": 544, "Ġup</w>": 898, "Ġus": 668, "Ġused</w>": 988, "Ġv": 495, "Ġw": 363, "Ġwas</w>": 422, "Ġwere</w>": 525, "Ġwh": 443, "Ġwhen</w>": 851, "Ġwhere</w>": 995, "Ġwhich</w>": 573, "Ġwhile</w>": 935, "Ġwho</w>": 724, "Ġwit": 451, "Ġwith": 994, "Ġwith</w>": 466, "Ġwor": 643, "Ġwould</w>": 801, "Ġwrit": 937, "Ġy": 580, "Ġyear</w>": 987, "Ġâ": 556, "ĠâĢ": 565, "ĠâĢĵ</w>": 631, "ĠĊ</w>": 412, "Ģ": 161, "Ģ</w>": 223, "ģ": 162, "ģ</w>": 273, "Ĥ": 163, "Ĥ</w>": 262, "ĥ": 164, "ĥ</w>": 337, "Ħ": 165, "Ħ</w>": 278, "ħ": 166, "ħ</w>": 281, "Ĩ": 167, "Ĩ</w>": 308, "ĩ": 168, "ĩ</w>": 225, "Ī": 169, "Ī</w>": 221, "ī": 170, "ī</w>": 244, "Ĭ": 171, "Ĭ</w>": 315, "ĭ": 172, "ĭ</w>": 321, "Į": 173, "Į</w>": 324, "į": 174, "į</w>": 302, "İ": 175, "İ</w>": 249, "ı": 176, "ı</w>": 332, "IJ": 177, "IJ</w>": 295, "ij": 178, "ij</w>": 313, "Ĵ": 179, "Ĵ</w>": 328, "ĵ": 180, "ĵ</w>": 312, "Ķ": 181, "Ķ</w>": 256, "ķ": 182, "ķ</w>": 314, "ĸ": 183, "ĸ</w>": 277, "Ĺ": 184, "Ĺ</w>": 322, "ĺ": 185, "ĺ</w>": 285, "Ļ": 186, "Ļ</w>": 267, "ļ": 187, "ļ</w>": 290, "Ľ": 188, "Ľ</w>": 311, "ľ": 189, "ľ</w>": 299, "Ŀ": 190, "Ŀ</w>": 291, "ŀ": 191, "ŀ</w>": 293, "Ł": 192, "Ł</w>": 335, "ł": 193, "ł</w>": 252, "Ń": 194, "Ń</w>": 297}
|