ydshieh HF staff commited on
Commit
0452d34
1 Parent(s): 11b125e

Upload tiny models for CLIPModel

Browse files
config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "temp/dummy/clip/CLIPModel",
4
+ "architectures": [
5
+ "CLIPModel"
6
+ ],
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 1,
9
+ "initializer_factor": 1.0,
10
+ "logit_scale_init_value": 2.6592,
11
+ "model_type": "clip",
12
+ "pad_token_id": 1,
13
+ "projection_dim": 64,
14
+ "text_config": {
15
+ "_name_or_path": "",
16
+ "add_cross_attention": false,
17
+ "architectures": null,
18
+ "attention_dropout": 0.1,
19
+ "bad_words_ids": null,
20
+ "begin_suppress_tokens": null,
21
+ "bos_token_id": 0,
22
+ "chunk_size_feed_forward": 0,
23
+ "cross_attention_hidden_size": null,
24
+ "decoder_start_token_id": null,
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "dropout": 0.1,
28
+ "early_stopping": false,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "eos_token_id": 2,
31
+ "exponential_decay_length_penalty": null,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "hidden_act": "quick_gelu",
36
+ "hidden_size": 32,
37
+ "id2label": {
38
+ "0": "LABEL_0",
39
+ "1": "LABEL_1"
40
+ },
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 37,
44
+ "is_decoder": false,
45
+ "is_encoder_decoder": false,
46
+ "label2id": {
47
+ "LABEL_0": 0,
48
+ "LABEL_1": 1
49
+ },
50
+ "layer_norm_eps": 1e-05,
51
+ "length_penalty": 1.0,
52
+ "max_length": 20,
53
+ "max_position_embeddings": 512,
54
+ "min_length": 0,
55
+ "model_type": "clip_text_model",
56
+ "no_repeat_ngram_size": 0,
57
+ "num_attention_heads": 4,
58
+ "num_beam_groups": 1,
59
+ "num_beams": 1,
60
+ "num_hidden_layers": 5,
61
+ "num_return_sequences": 1,
62
+ "output_attentions": false,
63
+ "output_hidden_states": false,
64
+ "output_scores": false,
65
+ "pad_token_id": 1,
66
+ "prefix": null,
67
+ "problem_type": null,
68
+ "projection_dim": 32,
69
+ "pruned_heads": {},
70
+ "remove_invalid_values": false,
71
+ "repetition_penalty": 1.0,
72
+ "return_dict": true,
73
+ "return_dict_in_generate": false,
74
+ "sep_token_id": null,
75
+ "suppress_tokens": null,
76
+ "task_specific_params": null,
77
+ "temperature": 1.0,
78
+ "tf_legacy_loss": false,
79
+ "tie_encoder_decoder": false,
80
+ "tie_word_embeddings": true,
81
+ "tokenizer_class": null,
82
+ "top_k": 50,
83
+ "top_p": 1.0,
84
+ "torch_dtype": null,
85
+ "torchscript": false,
86
+ "transformers_version": "4.25.0.dev0",
87
+ "typical_p": 1.0,
88
+ "use_bfloat16": false,
89
+ "vocab_size": 1024
90
+ },
91
+ "torch_dtype": "float32",
92
+ "transformers_version": null,
93
+ "vision_config": {
94
+ "_name_or_path": "",
95
+ "add_cross_attention": false,
96
+ "architectures": null,
97
+ "attention_dropout": 0.1,
98
+ "bad_words_ids": null,
99
+ "begin_suppress_tokens": null,
100
+ "bos_token_id": null,
101
+ "chunk_size_feed_forward": 0,
102
+ "cross_attention_hidden_size": null,
103
+ "decoder_start_token_id": null,
104
+ "diversity_penalty": 0.0,
105
+ "do_sample": false,
106
+ "dropout": 0.1,
107
+ "early_stopping": false,
108
+ "encoder_no_repeat_ngram_size": 0,
109
+ "eos_token_id": null,
110
+ "exponential_decay_length_penalty": null,
111
+ "finetuning_task": null,
112
+ "forced_bos_token_id": null,
113
+ "forced_eos_token_id": null,
114
+ "hidden_act": "quick_gelu",
115
+ "hidden_size": 32,
116
+ "id2label": {
117
+ "0": "LABEL_0",
118
+ "1": "LABEL_1"
119
+ },
120
+ "image_size": 30,
121
+ "initializer_factor": 1.0,
122
+ "initializer_range": 0.02,
123
+ "intermediate_size": 37,
124
+ "is_decoder": false,
125
+ "is_encoder_decoder": false,
126
+ "label2id": {
127
+ "LABEL_0": 0,
128
+ "LABEL_1": 1
129
+ },
130
+ "layer_norm_eps": 1e-05,
131
+ "length_penalty": 1.0,
132
+ "max_length": 20,
133
+ "min_length": 0,
134
+ "model_type": "clip_vision_model",
135
+ "no_repeat_ngram_size": 0,
136
+ "num_attention_heads": 4,
137
+ "num_beam_groups": 1,
138
+ "num_beams": 1,
139
+ "num_channels": 3,
140
+ "num_hidden_layers": 5,
141
+ "num_return_sequences": 1,
142
+ "output_attentions": false,
143
+ "output_hidden_states": false,
144
+ "output_scores": false,
145
+ "pad_token_id": null,
146
+ "patch_size": 2,
147
+ "prefix": null,
148
+ "problem_type": null,
149
+ "projection_dim": 32,
150
+ "pruned_heads": {},
151
+ "remove_invalid_values": false,
152
+ "repetition_penalty": 1.0,
153
+ "return_dict": true,
154
+ "return_dict_in_generate": false,
155
+ "sep_token_id": null,
156
+ "suppress_tokens": null,
157
+ "task_specific_params": null,
158
+ "temperature": 1.0,
159
+ "tf_legacy_loss": false,
160
+ "tie_encoder_decoder": false,
161
+ "tie_word_embeddings": true,
162
+ "tokenizer_class": null,
163
+ "top_k": 50,
164
+ "top_p": 1.0,
165
+ "torch_dtype": null,
166
+ "torchscript": false,
167
+ "transformers_version": "4.25.0.dev0",
168
+ "typical_p": 1.0,
169
+ "use_bfloat16": false
170
+ }
171
+ }
merges.txt ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ t h
3
+ th e</w>
4
+ i n
5
+ a n
6
+ e d</w>
7
+ e r
8
+ r e
9
+ a r
10
+ t i
11
+ o n
12
+ e n
13
+ o f</w>
14
+ o r
15
+ an d</w>
16
+ e r</w>
17
+ o n</w>
18
+ i n</w>
19
+ in g</w>
20
+ s t
21
+ r o
22
+ a l
23
+ i t
24
+ t o</w>
25
+ a s</w>
26
+ a t
27
+ e s</w>
28
+ o u
29
+ h i
30
+ a c
31
+ s i
32
+ a t</w>
33
+ r i
34
+ a l</w>
35
+ e l
36
+ a n</w>
37
+ a m
38
+ o r</w>
39
+ s t</w>
40
+ l i
41
+ u r
42
+ e c
43
+ o m
44
+ d i
45
+ w as</w>
46
+ l y</w>
47
+ e n</w>
48
+ e a
49
+ c h
50
+ u n
51
+ ti on</w>
52
+ l a
53
+ i s</w>
54
+ f i
55
+ o l
56
+ d e
57
+ - @</w>
58
+ @ -@</w>
59
+ r a
60
+ v i
61
+ l e</w>
62
+ l o
63
+ s h
64
+ e m
65
+ b e
66
+ th at</w>
67
+ ' s</w>
68
+ c on
69
+ m a
70
+ f or</w>
71
+ h a
72
+ s u
73
+ b y</w>
74
+ it h</w>
75
+ v e</w>
76
+ w ith</w>
77
+ s e</w>
78
+ c h</w>
79
+ th e
80
+ en t
81
+ p o
82
+ c e</w>
83
+ i l
84
+ s e
85
+ en t</w>
86
+ l e
87
+ c om
88
+ s p
89
+ er e</w>
90
+ p ro
91
+ n o
92
+ b u
93
+ w h
94
+ i t</w>
95
+ t h</w>
96
+ v er
97
+ n e
98
+ c a
99
+ i s
100
+ f or
101
+ a g
102
+ er s</w>
103
+ m o
104
+ g h
105
+ f ro
106
+ t ed</w>
107
+ fro m</w>
108
+ ti on
109
+ o p
110
+ hi s</w>
111
+ a d
112
+ a b
113
+ i c
114
+ h e</w>
115
+ ou n
116
+ a s
117
+ t s</w>
118
+ s c
119
+ d e</w>
120
+ o w
121
+ e x
122
+ w hi
123
+ r u
124
+ t er</w>
125
+ a p
126
+ d s</w>
127
+ w ere</w>
128
+ p re
129
+ d u
130
+ g u
131
+ p ar
132
+ i r
133
+ b o
134
+ th er</w>
135
+ q u
136
+ l u
137
+ t er
138
+ t w
139
+ e s
140
+ re c
141
+ p er
142
+ t a
143
+ at e</w>
144
+ v er</w>
145
+ at ed</w>
146
+ d ing</w>
147
+ it y</w>
148
+ m an
149
+ e ar
150
+ s ed</w>
151
+ d ed</w>
152
+ a u
153
+ al l</w>
154
+ am e</w>
155
+ c i
156
+ on e</w>
157
+ in g
158
+ ar e</w>
159
+ a f
160
+ i r</w>
161
+ a tion</w>
162
+ â Ģ
163
+ ha d</w>
164
+ t r
165
+ u l
166
+ l d</w>
167
+ whi ch</w>
168
+ w a
169
+ i m
170
+ l ea
171
+ b e</w>
172
+ t o
173
+ ti m
174
+ fi r
175
+ w or
176
+ on g</w>
177
+ p or
178
+ m ar
179
+ m e
180
+ al ly</w>
181
+ s o</w>
182
+ ou t</w>
183
+ tion s</w>
184
+ it s</w>
185
+ g h</w>
186
+ g e</w>
187
+ b er</w>
188
+ f e
189
+ p u
190
+ s er
191
+ d er
192
+ p l
193
+ s s</w>
194
+ in e</w>
195
+ in c
196
+ m i
197
+ gh t</w>
198
+ g o
199
+ th is</w>
200
+ t ur
201
+ d a
202
+ ro u
203
+ bu t</w>
204
+ u m
205
+ s on</w>
206
+ w e
207
+ v ed</w>
208
+ si on</w>
209
+ k e</w>
210
+ p la
211
+ the ir</w>
212
+ i es</w>
213
+ fir st</w>
214
+ s a
215
+ o c
216
+ at t
217
+ o f
218
+ p e
219
+ no t</w>
220
+ g i
221
+ n a
222
+ ar y</w>
223
+ m u
224
+ l ed</w>
225
+ âĢ ĵ</w>
226
+ h er</w>
227
+ r an
228
+ c o
229
+ the y</w>
230
+ d er</w>
231
+ al i
232
+ al so</w>
233
+ or e</w>
234
+ e p
235
+ ou ld</w>
236
+ af ter</w>
237
+ s hi
238
+ u s</w>
239
+ e t</w>
240
+ ti c
241
+ st or
242
+ w i
243
+ e v
244
+ o ther</w>
245
+ s h</w>
246
+ t ing</w>
247
+ ar d</w>
248
+ t e
249
+ tw o</w>
250
+ n i
251
+ ha ve</w>
252
+ ou r
253
+ com m
254
+ t e</w>
255
+ ac k</w>
256
+ o o
257
+ f in
258
+ s ec
259
+ ent s</w>
260
+ h as</w>
261
+ com p
262
+ b ec
263
+ k s</w>
264
+ con t
265
+ l and</w>
266
+ be en</w>
267
+ en ce</w>
268
+ k ing</w>
269
+ e l</w>
270
+ ag e</w>
271
+ lo w
272
+ m in
273
+ . @</w>
274
+ @ .@</w>
275
+ om e</w>
276
+ m ent</w>
277
+ ch ar
278
+ g e
279
+ at er</w>
280
+ n or
281
+ h o
282
+ ou s</w>
283
+ wh o</w>
284
+ ea r</w>
285
+ sp ec
286
+ c ol
287
+ el y</w>
288
+ t y</w>
289
+ j o
290
+ ur ing</w>
291
+ du c
292
+ b ri
293
+ st r
294
+ c an
295
+ or i
296
+ t ra
297
+ p a
298
+ sh e</w>
299
+ d o
300
+ ti ve</w>
301
+ m on
302
+ ne w</w>
303
+ r it
304
+ tim e</w>
305
+ on s</w>
306
+ s o
307
+ m an</w>
308
+ d ec
309
+ c ent
310
+ l an
311
+ p i
312
+ ou r</w>
313
+ in ter
314
+ f er
315
+ g ra
316
+ g re
317
+ re s</w>
318
+ inc lu
319
+ m il
320
+ d uring</w>
321
+ ow n</w>
322
+ pre s
323
+ j u
324
+ n ed</w>
325
+ el l</w>
326
+ , @</w>
327
+ @ ,@</w>
328
+ it e</w>
329
+ g en
330
+ wh en</w>
331
+ si g
332
+ b i
333
+ re n
334
+ f a
335
+ g a
336
+ pla y
337
+ en g
338
+ tion al</w>
339
+ oun d</w>
340
+ th ou
341
+ m ore</w>
342
+ re e</w>
343
+ em ber</w>
344
+ e i
345
+ s ou
346
+ s ur
347
+ s ti
348
+ c ar
349
+ for m
350
+ l ar
351
+ s es</w>
352
+ t en
353
+ in to</w>
354
+ t u
355
+ c es</w>
356
+ mo st</w>
357
+ k ed</w>
358
+ wa y</w>
359
+ c re
360
+ c oun
361
+ u p</w>
362
+ l es</w>
363
+ ac e</w>
364
+ al s</w>
365
+ k e
366
+ w ould</w>
367
+ an t</w>
368
+ b er
369
+ f u
370
+ it ed</w>
371
+ p ri
372
+ whi le</w>
373
+ o ver</w>
374
+ ing s</w>
375
+ r e</w>
376
+ fi l
377
+ s y
378
+ e st
379
+ ab le</w>
380
+ w n</w>
381
+ s ea
382
+ ac h
383
+ s ing</w>
384
+ in s</w>
385
+ ti c</w>
386
+ i d</w>
387
+ on ly</w>
388
+ at es</w>
389
+ t ri
390
+ v ing</w>
391
+ b a
392
+ v el
393
+ an ce</w>
394
+ st a
395
+ er n</w>
396
+ f ol
397
+ e en</w>
398
+ in ed</w>
399
+ st ru
400
+ un i
401
+ g ame</w>
402
+ la r</w>
403
+ s el
404
+ b li
405
+ u sed</w>
406
+ n ing</w>
407
+ p s</w>
408
+ ti es</w>
409
+ k no
410
+ c or
411
+ f t</w>
412
+ rec or
413
+ b le</w>
414
+ vi e
415
+ y s</w>
416
+ w il
417
+ ic al</w>
418
+ ap p
419
+ t ro
420
+ th ree</w>
421
+ c la
422
+ ol d</w>
423
+ sh ed</w>
424
+ h ea
425
+ ab out</w>
426
+ w rit
427
+ th an</w>
428
+ st e
429
+ l ater</w>
430
+ ar i
431
+ d y</w>
432
+ pu bli
433
+ lo c
434
+ ag a
435
+ th rou
436
+ s si
437
+ en d</w>
438
+ ma y</w>
439
+ an g
440
+ ac h</w>
441
+ v es</w>
442
+ o g
443
+ hi m</w>
444
+ be tw
445
+ thou gh</w>
446
+ betw een</w>
447
+ u m</w>
448
+ st ar
449
+ sc ri
450
+ re a
451
+ on d</w>
452
+ shi p</w>
453
+ o k</w>
454
+ h el
455
+ s ong</w>
456
+ c hi
457
+ ca p
458
+ e ver</w>
459
+ da y</w>
460
+ c ri
461
+ s ome</w>
462
+ b ro
463
+ n o</w>
464
+ th ere</w>
465
+ an s</w>
466
+ al l
467
+ n um
468
+ r ed</w>
469
+ ear s</w>
470
+ st s</w>
471
+ an y</w>
472
+ w ar
473
+ p h
474
+ p p
475
+ g in
476
+ stru c
477
+ am er
478
+ pro duc
479
+ s ch
480
+ c es
481
+ ur e</w>
482
+ at ing</w>
483
+ em p
484
+ t or
485
+ sea son</w>
486
+ for e</w>
487
+ i c</w>
488
+ c ity</w>
489
+ g ro
490
+ fol low
491
+ su b
492
+ b el
493
+ y ear</w>
494
+ c an</w>
495
+ s in
496
+ wh ere</w>
497
+ an d
498
+ ma de</w>
499
+ re lea
500
+ s m
501
+ b l
502
+ t en</w>
503
+ wi th
504
+ s on
505
+ man y</w>
506
+ a re
507
+ e d
508
+ h ow
509
+ amer ic
510
+ ur y</w>
511
+ st u
512
+ mu si
513
+ c u
514
+ n am
515
+ em ent</w>
516
+ su ch</w>
517
+ al bu
518
+ bu il
519
+ be fore</w>
520
+ e f
521
+ ar m
522
+ t on</w>
523
+ the m</w>
524
+ c al
525
+ b ar
526
+ d es</w>
527
+ m at
528
+ gen er
529
+ o d</w>
530
+ ser ies</w>
531
+ c er
532
+ sh o
533
+ en ti
534
+ h er
535
+ o ver
536
+ an n
537
+ w ell</w>
538
+ wor ld</w>
539
+ g an</w>
540
+ e st</w>
541
+ sec ond</w>
542
+ t ers</w>
543
+ si de</w>
544
+ tr an
545
+ l ine</w>
546
+ tur e</w>
547
+ por t</w>
548
+ be ing</w>
549
+ y ears</w>
550
+ bo th</w>
551
+ in di
552
+ the se</w>
553
+ na tional</w>
554
+ hi stor
555
+ f e</w>
556
+ v o
557
+ st ed</w>
558
+ an i
559
+ b as
560
+ po in
561
+ s ing
562
+ fil m</w>
563
+ p en
564
+ su p
565
+ m is
566
+ c ro
567
+ st ri
568
+ l in
569
+ t re
570
+ wa r</w>
571
+ how ever</w>
572
+ y ing</w>
573
+ l ing</w>
574
+ y p
575
+ ec ted</w>
576
+ di rec
577
+ vi sion</w>
578
+ albu m</w>
579
+ th en</w>
580
+ l l</w>
581
+ se ver
582
+ throu gh</w>
583
+ kno wn</w>
584
+ b or
585
+ c ul
586
+ c lu
587
+ st er</w>
588
+ sou th</w>
589
+ r y</w>
590
+ ec t</w>
591
+ lo w</w>
592
+ p r
593
+ s k
594
+ is o
595
+ nor th</w>
596
+ par t</w>
597
+ f ac
598
+ t ly</w>
599
+ per i
600
+ e u
601
+ b att
602
+ st ate</w>
603
+ c ed</w>
604
+ con si
605
+ in f
606
+ po li
607
+ ol og
608
+ ear ly</w>
609
+ po si
610
+ am es</w>
611
+ w in
612
+ de vel
613
+ o b
614
+ v e
615
+ v en</w>
616
+ op er
617
+ g er
618
+ of fi
619
+ char ac
620
+ m s</w>
621
+ hi gh
622
+ a d</w>
623
+ th o
624
+ sever al</w>
625
+ d re
626
+ de scri
627
+ al e</w>
628
+ num ber</w>
629
+ a ir
630
+ inclu ding</w>
631
+ in st</w>
632
+ aga inst</w>
633
+ l s</w>
634
+ su l
635
+ ep iso
636
+ c am
637
+ di f
638
+ so ci
639
+ bec ame</w>
640
+ li ke</w>
641
+ t el
642
+ f our</w>
643
+ âĢ Ķ</w>
644
+ h ou
645
+ jo h
646
+ un ited</w>
647
+ in v
648
+ un der</w>
649
+ no v
650
+ ti v
651
+ su c
652
+ a tions</w>
653
+ ac k
654
+ t or</w>
655
+ r on
656
+ un d</w>
657
+ w s</w>
658
+ f o
659
+ g r
660
+ devel op
661
+ al though</w>
662
+ cont in
663
+ we st</w>
664
+ ori gin
665
+ musi c</w>
666
+ or s</w>
667
+ d on</w>
668
+ cent ury</w>
669
+ w ard</w>
670
+ wor k</w>
671
+ m e</w>
672
+ am i
673
+ ch a
674
+ ver y</w>
675
+ h ar
676
+ di s
677
+ z ed</w>
678
+ d o</w>
679
+ g s</w>
680
+ t ow
681
+ s ol
682
+ follow ing</w>
683
+ li on</w>
684
+ re ma
685
+ n s</w>
686
+ ti sh</w>
687
+ ch ur
688
+ s om
689
+ m p
690
+ t le</w>
691
+ go ver
692
+ d el
693
+ comp le
694
+ c ur
695
+ u se</w>
696
+ b ack</w>
697
+ h u
698
+ st ern</w>
699
+ be gan</w>
700
+ fi el
701
+ au se</w>
702
+ d ra
703
+ p as
704
+ b il
705
+ ca tion</w>
706
+ d ent</w>
707
+ b ed</w>
708
+ bec ause</w>
709
+ an t
710
+ ea m</w>
711
+ p hi
712
+ y o
713
+ contin u
714
+ ta in</w>
715
+ tr y</w>
716
+ f re
717
+ pe op
718
+ cal led</w>
719
+ f ound</w>
720
+ episo de</w>
721
+ de sig
722
+ m or
723
+ se t</w>
724
+ le y</w>
725
+ ea st</w>
726
+ tr ac
727
+ c ra
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 30,
4
+ "width": 30
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 30
26
+ }
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d0ce4dd8f7b4bd5fd35cec79eb5bfdd153357d0dea5a7b109a63294468e64d8
3
+ size 578637
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7714fee94709ec380e321fd721c26b740f7f1e6740b4e87e54ead6d42d3daee0
3
+ size 722684
tokenizer.json ADDED
@@ -0,0 +1,1843 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<|startoftext|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": true,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<|endoftext|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ }
24
+ ],
25
+ "normalizer": {
26
+ "type": "Sequence",
27
+ "normalizers": [
28
+ {
29
+ "type": "NFC"
30
+ },
31
+ {
32
+ "type": "Replace",
33
+ "pattern": {
34
+ "Regex": "\\s+"
35
+ },
36
+ "content": " "
37
+ },
38
+ {
39
+ "type": "Lowercase"
40
+ }
41
+ ]
42
+ },
43
+ "pre_tokenizer": {
44
+ "type": "Sequence",
45
+ "pretokenizers": [
46
+ {
47
+ "type": "Split",
48
+ "pattern": {
49
+ "Regex": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
50
+ },
51
+ "behavior": "Removed",
52
+ "invert": true
53
+ },
54
+ {
55
+ "type": "ByteLevel",
56
+ "add_prefix_space": false,
57
+ "trim_offsets": true,
58
+ "use_regex": true
59
+ }
60
+ ]
61
+ },
62
+ "post_processor": {
63
+ "type": "RobertaProcessing",
64
+ "sep": [
65
+ "<|endoftext|>",
66
+ 1
67
+ ],
68
+ "cls": [
69
+ "<|startoftext|>",
70
+ 0
71
+ ],
72
+ "trim_offsets": false,
73
+ "add_prefix_space": false
74
+ },
75
+ "decoder": {
76
+ "type": "ByteLevel",
77
+ "add_prefix_space": true,
78
+ "trim_offsets": true,
79
+ "use_regex": true
80
+ },
81
+ "model": {
82
+ "type": "BPE",
83
+ "dropout": null,
84
+ "unk_token": "<|endoftext|>",
85
+ "continuing_subword_prefix": "",
86
+ "end_of_word_suffix": "</w>",
87
+ "fuse_unk": false,
88
+ "vocab": {
89
+ "<|startoftext|>": 0,
90
+ "<|endoftext|>": 1,
91
+ "!": 2,
92
+ "\"": 3,
93
+ "#": 4,
94
+ "$": 5,
95
+ "%": 6,
96
+ "&": 7,
97
+ "'": 8,
98
+ "(": 9,
99
+ ")": 10,
100
+ "*": 11,
101
+ "+": 12,
102
+ ",": 13,
103
+ "-": 14,
104
+ ".": 15,
105
+ "/": 16,
106
+ "0": 17,
107
+ "1": 18,
108
+ "2": 19,
109
+ "3": 20,
110
+ "4": 21,
111
+ "5": 22,
112
+ "6": 23,
113
+ "7": 24,
114
+ "8": 25,
115
+ "9": 26,
116
+ ":": 27,
117
+ ";": 28,
118
+ "<": 29,
119
+ "=": 30,
120
+ ">": 31,
121
+ "?": 32,
122
+ "@": 33,
123
+ "[": 34,
124
+ "\\": 35,
125
+ "]": 36,
126
+ "^": 37,
127
+ "_": 38,
128
+ "`": 39,
129
+ "a": 40,
130
+ "b": 41,
131
+ "c": 42,
132
+ "d": 43,
133
+ "e": 44,
134
+ "f": 45,
135
+ "g": 46,
136
+ "h": 47,
137
+ "i": 48,
138
+ "j": 49,
139
+ "k": 50,
140
+ "l": 51,
141
+ "m": 52,
142
+ "n": 53,
143
+ "o": 54,
144
+ "p": 55,
145
+ "q": 56,
146
+ "r": 57,
147
+ "s": 58,
148
+ "t": 59,
149
+ "u": 60,
150
+ "v": 61,
151
+ "w": 62,
152
+ "x": 63,
153
+ "y": 64,
154
+ "z": 65,
155
+ "|": 66,
156
+ "}": 67,
157
+ "~": 68,
158
+ "¡": 69,
159
+ "¢": 70,
160
+ "£": 71,
161
+ "¤": 72,
162
+ "¥": 73,
163
+ "¦": 74,
164
+ "§": 75,
165
+ "¨": 76,
166
+ "©": 77,
167
+ "ª": 78,
168
+ "«": 79,
169
+ "¬": 80,
170
+ "®": 81,
171
+ "¯": 82,
172
+ "°": 83,
173
+ "±": 84,
174
+ "²": 85,
175
+ "³": 86,
176
+ "´": 87,
177
+ "µ": 88,
178
+ "¶": 89,
179
+ "·": 90,
180
+ "¸": 91,
181
+ "¹": 92,
182
+ "º": 93,
183
+ "»": 94,
184
+ "¼": 95,
185
+ "½": 96,
186
+ "¾": 97,
187
+ "¿": 98,
188
+ "Â": 99,
189
+ "Ã": 100,
190
+ "Ä": 101,
191
+ "Å": 102,
192
+ "Æ": 103,
193
+ "Ç": 104,
194
+ "È": 105,
195
+ "É": 106,
196
+ "Ê": 107,
197
+ "Ë": 108,
198
+ "Ì": 109,
199
+ "Í": 110,
200
+ "Î": 111,
201
+ "Ï": 112,
202
+ "Ð": 113,
203
+ "Ñ": 114,
204
+ "Ö": 115,
205
+ "×": 116,
206
+ "Ø": 117,
207
+ "Ù": 118,
208
+ "Ü": 119,
209
+ "à": 120,
210
+ "á": 121,
211
+ "â": 122,
212
+ "ã": 123,
213
+ "ä": 124,
214
+ "å": 125,
215
+ "æ": 126,
216
+ "ç": 127,
217
+ "è": 128,
218
+ "é": 129,
219
+ "ë": 130,
220
+ "ì": 131,
221
+ "ï": 132,
222
+ "Ģ": 133,
223
+ "ģ": 134,
224
+ "Ĥ": 135,
225
+ "ĥ": 136,
226
+ "Ħ": 137,
227
+ "ħ": 138,
228
+ "Ĩ": 139,
229
+ "ĩ": 140,
230
+ "Ī": 141,
231
+ "ī": 142,
232
+ "Ĭ": 143,
233
+ "ĭ": 144,
234
+ "Į": 145,
235
+ "į": 146,
236
+ "İ": 147,
237
+ "ı": 148,
238
+ "IJ": 149,
239
+ "ij": 150,
240
+ "Ĵ": 151,
241
+ "ĵ": 152,
242
+ "Ķ": 153,
243
+ "ķ": 154,
244
+ "ĸ": 155,
245
+ "Ĺ": 156,
246
+ "ĺ": 157,
247
+ "Ļ": 158,
248
+ "ļ": 159,
249
+ "Ľ": 160,
250
+ "ľ": 161,
251
+ "Ŀ": 162,
252
+ "ŀ": 163,
253
+ "Ł": 164,
254
+ "ł": 165,
255
+ "Ń": 166,
256
+ "d</w>": 167,
257
+ "h</w>": 168,
258
+ "y</w>": 169,
259
+ "s</w>": 170,
260
+ "e</w>": 171,
261
+ "l</w>": 172,
262
+ "m</w>": 173,
263
+ "n</w>": 174,
264
+ "g</w>": 175,
265
+ "r</w>": 176,
266
+ "¨</w>": 177,
267
+ "c</w>": 178,
268
+ "±</w>": 179,
269
+ "t</w>": 180,
270
+ "į</w>": 181,
271
+ "o</w>": 182,
272
+ "k</w>": 183,
273
+ "a</w>": 184,
274
+ "i</w>": 185,
275
+ "u</w>": 186,
276
+ "f</w>": 187,
277
+ "©</w>": 188,
278
+ "p</w>": 189,
279
+ "¯</w>": 190,
280
+ "¾</w>": 191,
281
+ "¤</w>": 192,
282
+ "z</w>": 193,
283
+ "w</w>": 194,
284
+ "Ĥ</w>": 195,
285
+ "v</w>": 196,
286
+ "ĩ</w>": 197,
287
+ "»</w>": 198,
288
+ "b</w>": 199,
289
+ "x</w>": 200,
290
+ "°</w>": 201,
291
+ "ª</w>": 202,
292
+ "j</w>": 203,
293
+ "º</w>": 204,
294
+ "«</w>": 205,
295
+ "¡</w>": 206,
296
+ "¹</w>": 207,
297
+ "ĥ</w>": 208,
298
+ "Ĭ</w>": 209,
299
+ "q</w>": 210,
300
+ "·</w>": 211,
301
+ "¥</w>": 212,
302
+ "Ń</w>": 213,
303
+ "¶</w>": 214,
304
+ "¦</w>": 215,
305
+ "IJ</w>": 216,
306
+ "¸</w>": 217,
307
+ "9</w>": 218,
308
+ "Į</w>": 219,
309
+ "Ł</w>": 220,
310
+ "¢</w>": 221,
311
+ "ı</w>": 222,
312
+ "'</w>": 223,
313
+ "¼</w>": 224,
314
+ "µ</w>": 225,
315
+ "§</w>": 226,
316
+ "}</w>": 227,
317
+ ";</w>": 228,
318
+ "Ķ</w>": 229,
319
+ "Ĺ</w>": 230,
320
+ "@</w>": 231,
321
+ "¬</w>": 232,
322
+ "Ļ</w>": 233,
323
+ "Ĵ</w>": 234,
324
+ "³</w>": 235,
325
+ ".</w>": 236,
326
+ "ĺ</w>": 237,
327
+ "Ĩ</w>": 238,
328
+ "Ħ</w>": 239,
329
+ "8</w>": 240,
330
+ "ł</w>": 241,
331
+ "ķ</w>": 242,
332
+ "£</w>": 243,
333
+ "ŀ</w>": 244,
334
+ "Ŀ</w>": 245,
335
+ "²</w>": 246,
336
+ "-</w>": 247,
337
+ "®</w>": 248,
338
+ "¿</w>": 249,
339
+ "ļ</w>": 250,
340
+ "ľ</w>": 251,
341
+ "Ģ</w>": 252,
342
+ "ī</w>": 253,
343
+ "Ľ</w>": 254,
344
+ "%</w>": 255,
345
+ "Ī</w>": 256,
346
+ "´</w>": 257,
347
+ "&</w>": 258,
348
+ "ĭ</w>": 259,
349
+ "ģ</w>": 260,
350
+ "İ</w>": 261,
351
+ "ij</w>": 262,
352
+ "ĵ</w>": 263,
353
+ "=</w>": 264,
354
+ "<</w>": 265,
355
+ "ĸ</w>": 266,
356
+ "`</w>": 267,
357
+ "|</w>": 268,
358
+ "_</w>": 269,
359
+ "ħ</w>": 270,
360
+ "½</w>": 271,
361
+ "/</w>": 272,
362
+ "></w>": 273,
363
+ "$</w>": 274,
364
+ "5</w>": 275,
365
+ ",</w>": 276,
366
+ "!</w>": 277,
367
+ "]</w>": 278,
368
+ ")</w>": 279,
369
+ "7</w>": 280,
370
+ "\"</w>": 281,
371
+ "^</w>": 282,
372
+ "[</w>": 283,
373
+ "4</w>": 284,
374
+ "0</w>": 285,
375
+ "2</w>": 286,
376
+ "~</w>": 287,
377
+ "#</w>": 288,
378
+ "(</w>": 289,
379
+ "6</w>": 290,
380
+ "3</w>": 291,
381
+ ":</w>": 292,
382
+ "1</w>": 293,
383
+ "+</w>": 294,
384
+ "*</w>": 295,
385
+ "?</w>": 296,
386
+ "\\</w>": 297,
387
+ "th": 298,
388
+ "the</w>": 299,
389
+ "in": 300,
390
+ "an": 301,
391
+ "ed</w>": 302,
392
+ "er": 303,
393
+ "re": 304,
394
+ "ar": 305,
395
+ "ti": 306,
396
+ "on": 307,
397
+ "en": 308,
398
+ "of</w>": 309,
399
+ "or": 310,
400
+ "and</w>": 311,
401
+ "er</w>": 312,
402
+ "on</w>": 313,
403
+ "in</w>": 314,
404
+ "ing</w>": 315,
405
+ "st": 316,
406
+ "ro": 317,
407
+ "al": 318,
408
+ "it": 319,
409
+ "to</w>": 320,
410
+ "as</w>": 321,
411
+ "at": 322,
412
+ "es</w>": 323,
413
+ "ou": 324,
414
+ "hi": 325,
415
+ "ac": 326,
416
+ "si": 327,
417
+ "at</w>": 328,
418
+ "ri": 329,
419
+ "al</w>": 330,
420
+ "el": 331,
421
+ "an</w>": 332,
422
+ "am": 333,
423
+ "or</w>": 334,
424
+ "st</w>": 335,
425
+ "li": 336,
426
+ "ur": 337,
427
+ "ec": 338,
428
+ "om": 339,
429
+ "di": 340,
430
+ "was</w>": 341,
431
+ "ly</w>": 342,
432
+ "en</w>": 343,
433
+ "ea": 344,
434
+ "ch": 345,
435
+ "un": 346,
436
+ "tion</w>": 347,
437
+ "la": 348,
438
+ "is</w>": 349,
439
+ "fi": 350,
440
+ "ol": 351,
441
+ "de": 352,
442
+ "-@</w>": 353,
443
+ "@-@</w>": 354,
444
+ "ra": 355,
445
+ "vi": 356,
446
+ "le</w>": 357,
447
+ "lo": 358,
448
+ "sh": 359,
449
+ "em": 360,
450
+ "be": 361,
451
+ "that</w>": 362,
452
+ "'s</w>": 363,
453
+ "con": 364,
454
+ "ma": 365,
455
+ "for</w>": 366,
456
+ "ha": 367,
457
+ "su": 368,
458
+ "by</w>": 369,
459
+ "ith</w>": 370,
460
+ "ve</w>": 371,
461
+ "with</w>": 372,
462
+ "se</w>": 373,
463
+ "ch</w>": 374,
464
+ "the": 375,
465
+ "ent": 376,
466
+ "po": 377,
467
+ "ce</w>": 378,
468
+ "il": 379,
469
+ "se": 380,
470
+ "ent</w>": 381,
471
+ "le": 382,
472
+ "com": 383,
473
+ "sp": 384,
474
+ "ere</w>": 385,
475
+ "pro": 386,
476
+ "no": 387,
477
+ "bu": 388,
478
+ "wh": 389,
479
+ "it</w>": 390,
480
+ "th</w>": 391,
481
+ "ver": 392,
482
+ "ne": 393,
483
+ "ca": 394,
484
+ "is": 395,
485
+ "for": 396,
486
+ "ag": 397,
487
+ "ers</w>": 398,
488
+ "mo": 399,
489
+ "gh": 400,
490
+ "fro": 401,
491
+ "ted</w>": 402,
492
+ "from</w>": 403,
493
+ "tion": 404,
494
+ "op": 405,
495
+ "his</w>": 406,
496
+ "ad": 407,
497
+ "ab": 408,
498
+ "ic": 409,
499
+ "he</w>": 410,
500
+ "oun": 411,
501
+ "as": 412,
502
+ "ts</w>": 413,
503
+ "sc": 414,
504
+ "de</w>": 415,
505
+ "ow": 416,
506
+ "ex": 417,
507
+ "whi": 418,
508
+ "ru": 419,
509
+ "ter</w>": 420,
510
+ "ap": 421,
511
+ "ds</w>": 422,
512
+ "were</w>": 423,
513
+ "pre": 424,
514
+ "du": 425,
515
+ "gu": 426,
516
+ "par": 427,
517
+ "ir": 428,
518
+ "bo": 429,
519
+ "ther</w>": 430,
520
+ "qu": 431,
521
+ "lu": 432,
522
+ "ter": 433,
523
+ "tw": 434,
524
+ "es": 435,
525
+ "rec": 436,
526
+ "per": 437,
527
+ "ta": 438,
528
+ "ate</w>": 439,
529
+ "ver</w>": 440,
530
+ "ated</w>": 441,
531
+ "ding</w>": 442,
532
+ "ity</w>": 443,
533
+ "man": 444,
534
+ "ear": 445,
535
+ "sed</w>": 446,
536
+ "ded</w>": 447,
537
+ "au": 448,
538
+ "all</w>": 449,
539
+ "ame</w>": 450,
540
+ "ci": 451,
541
+ "one</w>": 452,
542
+ "ing": 453,
543
+ "are</w>": 454,
544
+ "af": 455,
545
+ "ir</w>": 456,
546
+ "ation</w>": 457,
547
+ "âĢ": 458,
548
+ "had</w>": 459,
549
+ "tr": 460,
550
+ "ul": 461,
551
+ "ld</w>": 462,
552
+ "which</w>": 463,
553
+ "wa": 464,
554
+ "im": 465,
555
+ "lea": 466,
556
+ "be</w>": 467,
557
+ "to": 468,
558
+ "tim": 469,
559
+ "fir": 470,
560
+ "wor": 471,
561
+ "ong</w>": 472,
562
+ "por": 473,
563
+ "mar": 474,
564
+ "me": 475,
565
+ "ally</w>": 476,
566
+ "so</w>": 477,
567
+ "out</w>": 478,
568
+ "tions</w>": 479,
569
+ "its</w>": 480,
570
+ "gh</w>": 481,
571
+ "ge</w>": 482,
572
+ "ber</w>": 483,
573
+ "fe": 484,
574
+ "pu": 485,
575
+ "ser": 486,
576
+ "der": 487,
577
+ "pl": 488,
578
+ "ss</w>": 489,
579
+ "ine</w>": 490,
580
+ "inc": 491,
581
+ "mi": 492,
582
+ "ght</w>": 493,
583
+ "go": 494,
584
+ "this</w>": 495,
585
+ "tur": 496,
586
+ "da": 497,
587
+ "rou": 498,
588
+ "but</w>": 499,
589
+ "um": 500,
590
+ "son</w>": 501,
591
+ "we": 502,
592
+ "ved</w>": 503,
593
+ "sion</w>": 504,
594
+ "ke</w>": 505,
595
+ "pla": 506,
596
+ "their</w>": 507,
597
+ "ies</w>": 508,
598
+ "first</w>": 509,
599
+ "sa": 510,
600
+ "oc": 511,
601
+ "att": 512,
602
+ "of": 513,
603
+ "pe": 514,
604
+ "not</w>": 515,
605
+ "gi": 516,
606
+ "na": 517,
607
+ "ary</w>": 518,
608
+ "mu": 519,
609
+ "led</w>": 520,
610
+ "âĢĵ</w>": 521,
611
+ "her</w>": 522,
612
+ "ran": 523,
613
+ "co": 524,
614
+ "they</w>": 525,
615
+ "der</w>": 526,
616
+ "ali": 527,
617
+ "also</w>": 528,
618
+ "ore</w>": 529,
619
+ "ep": 530,
620
+ "ould</w>": 531,
621
+ "after</w>": 532,
622
+ "shi": 533,
623
+ "us</w>": 534,
624
+ "et</w>": 535,
625
+ "tic": 536,
626
+ "stor": 537,
627
+ "wi": 538,
628
+ "ev": 539,
629
+ "other</w>": 540,
630
+ "sh</w>": 541,
631
+ "ting</w>": 542,
632
+ "ard</w>": 543,
633
+ "te": 544,
634
+ "two</w>": 545,
635
+ "ni": 546,
636
+ "have</w>": 547,
637
+ "our": 548,
638
+ "comm": 549,
639
+ "te</w>": 550,
640
+ "ack</w>": 551,
641
+ "oo": 552,
642
+ "fin": 553,
643
+ "sec": 554,
644
+ "ents</w>": 555,
645
+ "has</w>": 556,
646
+ "comp": 557,
647
+ "bec": 558,
648
+ "ks</w>": 559,
649
+ "cont": 560,
650
+ "land</w>": 561,
651
+ "been</w>": 562,
652
+ "ence</w>": 563,
653
+ "king</w>": 564,
654
+ "el</w>": 565,
655
+ "age</w>": 566,
656
+ "low": 567,
657
+ "min": 568,
658
+ ".@</w>": 569,
659
+ "@.@</w>": 570,
660
+ "ome</w>": 571,
661
+ "ment</w>": 572,
662
+ "char": 573,
663
+ "ge": 574,
664
+ "ater</w>": 575,
665
+ "nor": 576,
666
+ "ho": 577,
667
+ "ous</w>": 578,
668
+ "who</w>": 579,
669
+ "ear</w>": 580,
670
+ "spec": 581,
671
+ "col": 582,
672
+ "ely</w>": 583,
673
+ "ty</w>": 584,
674
+ "jo": 585,
675
+ "uring</w>": 586,
676
+ "duc": 587,
677
+ "bri": 588,
678
+ "str": 589,
679
+ "can": 590,
680
+ "ori": 591,
681
+ "tra": 592,
682
+ "pa": 593,
683
+ "she</w>": 594,
684
+ "do": 595,
685
+ "tive</w>": 596,
686
+ "mon": 597,
687
+ "new</w>": 598,
688
+ "rit": 599,
689
+ "time</w>": 600,
690
+ "ons</w>": 601,
691
+ "so": 602,
692
+ "man</w>": 603,
693
+ "dec": 604,
694
+ "cent": 605,
695
+ "lan": 606,
696
+ "pi": 607,
697
+ "our</w>": 608,
698
+ "inter": 609,
699
+ "fer": 610,
700
+ "gra": 611,
701
+ "gre": 612,
702
+ "res</w>": 613,
703
+ "inclu": 614,
704
+ "mil": 615,
705
+ "during</w>": 616,
706
+ "own</w>": 617,
707
+ "pres": 618,
708
+ "ju": 619,
709
+ "ned</w>": 620,
710
+ "ell</w>": 621,
711
+ ",@</w>": 622,
712
+ "@,@</w>": 623,
713
+ "ite</w>": 624,
714
+ "gen": 625,
715
+ "when</w>": 626,
716
+ "sig": 627,
717
+ "bi": 628,
718
+ "ren": 629,
719
+ "fa": 630,
720
+ "ga": 631,
721
+ "play": 632,
722
+ "eng": 633,
723
+ "tional</w>": 634,
724
+ "ound</w>": 635,
725
+ "thou": 636,
726
+ "more</w>": 637,
727
+ "ree</w>": 638,
728
+ "ember</w>": 639,
729
+ "ei": 640,
730
+ "sou": 641,
731
+ "sur": 642,
732
+ "sti": 643,
733
+ "car": 644,
734
+ "form": 645,
735
+ "lar": 646,
736
+ "ses</w>": 647,
737
+ "ten": 648,
738
+ "into</w>": 649,
739
+ "tu": 650,
740
+ "ces</w>": 651,
741
+ "most</w>": 652,
742
+ "ked</w>": 653,
743
+ "way</w>": 654,
744
+ "cre": 655,
745
+ "coun": 656,
746
+ "up</w>": 657,
747
+ "les</w>": 658,
748
+ "ace</w>": 659,
749
+ "als</w>": 660,
750
+ "ke": 661,
751
+ "would</w>": 662,
752
+ "ant</w>": 663,
753
+ "ber": 664,
754
+ "fu": 665,
755
+ "ited</w>": 666,
756
+ "pri": 667,
757
+ "while</w>": 668,
758
+ "over</w>": 669,
759
+ "ings</w>": 670,
760
+ "re</w>": 671,
761
+ "fil": 672,
762
+ "sy": 673,
763
+ "est": 674,
764
+ "able</w>": 675,
765
+ "wn</w>": 676,
766
+ "sea": 677,
767
+ "ach": 678,
768
+ "sing</w>": 679,
769
+ "ins</w>": 680,
770
+ "tic</w>": 681,
771
+ "id</w>": 682,
772
+ "only</w>": 683,
773
+ "ates</w>": 684,
774
+ "tri": 685,
775
+ "ving</w>": 686,
776
+ "ba": 687,
777
+ "vel": 688,
778
+ "ance</w>": 689,
779
+ "sta": 690,
780
+ "ern</w>": 691,
781
+ "fol": 692,
782
+ "een</w>": 693,
783
+ "ined</w>": 694,
784
+ "stru": 695,
785
+ "uni": 696,
786
+ "game</w>": 697,
787
+ "lar</w>": 698,
788
+ "sel": 699,
789
+ "bli": 700,
790
+ "used</w>": 701,
791
+ "ning</w>": 702,
792
+ "ps</w>": 703,
793
+ "ties</w>": 704,
794
+ "kno": 705,
795
+ "cor": 706,
796
+ "ft</w>": 707,
797
+ "recor": 708,
798
+ "ble</w>": 709,
799
+ "vie": 710,
800
+ "ys</w>": 711,
801
+ "wil": 712,
802
+ "ical</w>": 713,
803
+ "app": 714,
804
+ "tro": 715,
805
+ "three</w>": 716,
806
+ "cla": 717,
807
+ "old</w>": 718,
808
+ "shed</w>": 719,
809
+ "hea": 720,
810
+ "about</w>": 721,
811
+ "writ": 722,
812
+ "than</w>": 723,
813
+ "ste": 724,
814
+ "later</w>": 725,
815
+ "ari": 726,
816
+ "dy</w>": 727,
817
+ "publi": 728,
818
+ "loc": 729,
819
+ "aga": 730,
820
+ "throu": 731,
821
+ "ssi": 732,
822
+ "end</w>": 733,
823
+ "may</w>": 734,
824
+ "ang": 735,
825
+ "ach</w>": 736,
826
+ "ves</w>": 737,
827
+ "og": 738,
828
+ "him</w>": 739,
829
+ "betw": 740,
830
+ "though</w>": 741,
831
+ "between</w>": 742,
832
+ "um</w>": 743,
833
+ "star": 744,
834
+ "scri": 745,
835
+ "rea": 746,
836
+ "ond</w>": 747,
837
+ "ship</w>": 748,
838
+ "ok</w>": 749,
839
+ "hel": 750,
840
+ "song</w>": 751,
841
+ "chi": 752,
842
+ "cap": 753,
843
+ "ever</w>": 754,
844
+ "day</w>": 755,
845
+ "cri": 756,
846
+ "some</w>": 757,
847
+ "bro": 758,
848
+ "no</w>": 759,
849
+ "there</w>": 760,
850
+ "ans</w>": 761,
851
+ "all": 762,
852
+ "num": 763,
853
+ "red</w>": 764,
854
+ "ears</w>": 765,
855
+ "sts</w>": 766,
856
+ "any</w>": 767,
857
+ "war": 768,
858
+ "ph": 769,
859
+ "pp": 770,
860
+ "gin": 771,
861
+ "struc": 772,
862
+ "amer": 773,
863
+ "produc": 774,
864
+ "sch": 775,
865
+ "ces": 776,
866
+ "ure</w>": 777,
867
+ "ating</w>": 778,
868
+ "emp": 779,
869
+ "tor": 780,
870
+ "season</w>": 781,
871
+ "fore</w>": 782,
872
+ "ic</w>": 783,
873
+ "city</w>": 784,
874
+ "gro": 785,
875
+ "follow": 786,
876
+ "sub": 787,
877
+ "bel": 788,
878
+ "year</w>": 789,
879
+ "can</w>": 790,
880
+ "sin": 791,
881
+ "where</w>": 792,
882
+ "and": 793,
883
+ "made</w>": 794,
884
+ "relea": 795,
885
+ "sm": 796,
886
+ "bl": 797,
887
+ "ten</w>": 798,
888
+ "with": 799,
889
+ "son": 800,
890
+ "many</w>": 801,
891
+ "are": 802,
892
+ "ed": 803,
893
+ "how": 804,
894
+ "americ": 805,
895
+ "ury</w>": 806,
896
+ "stu": 807,
897
+ "musi": 808,
898
+ "cu": 809,
899
+ "nam": 810,
900
+ "ement</w>": 811,
901
+ "such</w>": 812,
902
+ "albu": 813,
903
+ "buil": 814,
904
+ "before</w>": 815,
905
+ "ef": 816,
906
+ "arm": 817,
907
+ "ton</w>": 818,
908
+ "them</w>": 819,
909
+ "cal": 820,
910
+ "bar": 821,
911
+ "des</w>": 822,
912
+ "mat": 823,
913
+ "gener": 824,
914
+ "od</w>": 825,
915
+ "series</w>": 826,
916
+ "cer": 827,
917
+ "sho": 828,
918
+ "enti": 829,
919
+ "her": 830,
920
+ "over": 831,
921
+ "ann": 832,
922
+ "well</w>": 833,
923
+ "world</w>": 834,
924
+ "gan</w>": 835,
925
+ "est</w>": 836,
926
+ "second</w>": 837,
927
+ "ters</w>": 838,
928
+ "side</w>": 839,
929
+ "tran": 840,
930
+ "line</w>": 841,
931
+ "ture</w>": 842,
932
+ "port</w>": 843,
933
+ "being</w>": 844,
934
+ "years</w>": 845,
935
+ "both</w>": 846,
936
+ "indi": 847,
937
+ "these</w>": 848,
938
+ "national</w>": 849,
939
+ "histor": 850,
940
+ "fe</w>": 851,
941
+ "vo": 852,
942
+ "sted</w>": 853,
943
+ "ani": 854,
944
+ "bas": 855,
945
+ "poin": 856,
946
+ "sing": 857,
947
+ "film</w>": 858,
948
+ "pen": 859,
949
+ "sup": 860,
950
+ "mis": 861,
951
+ "cro": 862,
952
+ "stri": 863,
953
+ "lin": 864,
954
+ "tre": 865,
955
+ "war</w>": 866,
956
+ "however</w>": 867,
957
+ "ying</w>": 868,
958
+ "ling</w>": 869,
959
+ "yp": 870,
960
+ "ected</w>": 871,
961
+ "direc": 872,
962
+ "vision</w>": 873,
963
+ "album</w>": 874,
964
+ "then</w>": 875,
965
+ "ll</w>": 876,
966
+ "sever": 877,
967
+ "through</w>": 878,
968
+ "known</w>": 879,
969
+ "bor": 880,
970
+ "cul": 881,
971
+ "clu": 882,
972
+ "ster</w>": 883,
973
+ "south</w>": 884,
974
+ "ry</w>": 885,
975
+ "ect</w>": 886,
976
+ "low</w>": 887,
977
+ "pr": 888,
978
+ "sk": 889,
979
+ "iso": 890,
980
+ "north</w>": 891,
981
+ "part</w>": 892,
982
+ "fac": 893,
983
+ "tly</w>": 894,
984
+ "peri": 895,
985
+ "eu": 896,
986
+ "batt": 897,
987
+ "state</w>": 898,
988
+ "ced</w>": 899,
989
+ "consi": 900,
990
+ "inf": 901,
991
+ "poli": 902,
992
+ "olog": 903,
993
+ "early</w>": 904,
994
+ "posi": 905,
995
+ "ames</w>": 906,
996
+ "win": 907,
997
+ "devel": 908,
998
+ "ob": 909,
999
+ "ve": 910,
1000
+ "ven</w>": 911,
1001
+ "oper": 912,
1002
+ "ger": 913,
1003
+ "offi": 914,
1004
+ "charac": 915,
1005
+ "ms</w>": 916,
1006
+ "high": 917,
1007
+ "ad</w>": 918,
1008
+ "tho": 919,
1009
+ "several</w>": 920,
1010
+ "dre": 921,
1011
+ "descri": 922,
1012
+ "ale</w>": 923,
1013
+ "number</w>": 924,
1014
+ "air": 925,
1015
+ "including</w>": 926,
1016
+ "inst</w>": 927,
1017
+ "against</w>": 928,
1018
+ "ls</w>": 929,
1019
+ "sul": 930,
1020
+ "episo": 931,
1021
+ "cam": 932,
1022
+ "dif": 933,
1023
+ "soci": 934,
1024
+ "became</w>": 935,
1025
+ "like</w>": 936,
1026
+ "tel": 937,
1027
+ "four</w>": 938,
1028
+ "âĢĶ</w>": 939,
1029
+ "hou": 940,
1030
+ "joh": 941,
1031
+ "united</w>": 942,
1032
+ "inv": 943,
1033
+ "under</w>": 944,
1034
+ "nov": 945,
1035
+ "tiv": 946,
1036
+ "suc": 947,
1037
+ "ations</w>": 948,
1038
+ "ack": 949,
1039
+ "tor</w>": 950,
1040
+ "ron": 951,
1041
+ "und</w>": 952,
1042
+ "ws</w>": 953,
1043
+ "fo": 954,
1044
+ "gr": 955,
1045
+ "develop": 956,
1046
+ "although</w>": 957,
1047
+ "contin": 958,
1048
+ "west</w>": 959,
1049
+ "origin": 960,
1050
+ "music</w>": 961,
1051
+ "ors</w>": 962,
1052
+ "don</w>": 963,
1053
+ "century</w>": 964,
1054
+ "ward</w>": 965,
1055
+ "work</w>": 966,
1056
+ "me</w>": 967,
1057
+ "ami": 968,
1058
+ "cha": 969,
1059
+ "very</w>": 970,
1060
+ "har": 971,
1061
+ "dis": 972,
1062
+ "zed</w>": 973,
1063
+ "do</w>": 974,
1064
+ "gs</w>": 975,
1065
+ "tow": 976,
1066
+ "sol": 977,
1067
+ "following</w>": 978,
1068
+ "lion</w>": 979,
1069
+ "rema": 980,
1070
+ "ns</w>": 981,
1071
+ "tish</w>": 982,
1072
+ "chur": 983,
1073
+ "som": 984,
1074
+ "mp": 985,
1075
+ "tle</w>": 986,
1076
+ "gover": 987,
1077
+ "del": 988,
1078
+ "comple": 989,
1079
+ "cur": 990,
1080
+ "use</w>": 991,
1081
+ "back</w>": 992,
1082
+ "hu": 993,
1083
+ "stern</w>": 994,
1084
+ "began</w>": 995,
1085
+ "fiel": 996,
1086
+ "ause</w>": 997,
1087
+ "dra": 998,
1088
+ "pas": 999,
1089
+ "bil": 1000,
1090
+ "cation</w>": 1001,
1091
+ "dent</w>": 1002,
1092
+ "bed</w>": 1003,
1093
+ "because</w>": 1004,
1094
+ "ant": 1005,
1095
+ "eam</w>": 1006,
1096
+ "phi": 1007,
1097
+ "yo": 1008,
1098
+ "continu": 1009,
1099
+ "tain</w>": 1010,
1100
+ "try</w>": 1011,
1101
+ "fre": 1012,
1102
+ "peop": 1013,
1103
+ "called</w>": 1014,
1104
+ "found</w>": 1015,
1105
+ "episode</w>": 1016,
1106
+ "desig": 1017,
1107
+ "mor": 1018,
1108
+ "set</w>": 1019,
1109
+ "ley</w>": 1020,
1110
+ "east</w>": 1021,
1111
+ "trac": 1022,
1112
+ "cra": 1023
1113
+ },
1114
+ "merges": [
1115
+ "t h",
1116
+ "th e</w>",
1117
+ "i n",
1118
+ "a n",
1119
+ "e d</w>",
1120
+ "e r",
1121
+ "r e",
1122
+ "a r",
1123
+ "t i",
1124
+ "o n",
1125
+ "e n",
1126
+ "o f</w>",
1127
+ "o r",
1128
+ "an d</w>",
1129
+ "e r</w>",
1130
+ "o n</w>",
1131
+ "i n</w>",
1132
+ "in g</w>",
1133
+ "s t",
1134
+ "r o",
1135
+ "a l",
1136
+ "i t",
1137
+ "t o</w>",
1138
+ "a s</w>",
1139
+ "a t",
1140
+ "e s</w>",
1141
+ "o u",
1142
+ "h i",
1143
+ "a c",
1144
+ "s i",
1145
+ "a t</w>",
1146
+ "r i",
1147
+ "a l</w>",
1148
+ "e l",
1149
+ "a n</w>",
1150
+ "a m",
1151
+ "o r</w>",
1152
+ "s t</w>",
1153
+ "l i",
1154
+ "u r",
1155
+ "e c",
1156
+ "o m",
1157
+ "d i",
1158
+ "w as</w>",
1159
+ "l y</w>",
1160
+ "e n</w>",
1161
+ "e a",
1162
+ "c h",
1163
+ "u n",
1164
+ "ti on</w>",
1165
+ "l a",
1166
+ "i s</w>",
1167
+ "f i",
1168
+ "o l",
1169
+ "d e",
1170
+ "- @</w>",
1171
+ "@ -@</w>",
1172
+ "r a",
1173
+ "v i",
1174
+ "l e</w>",
1175
+ "l o",
1176
+ "s h",
1177
+ "e m",
1178
+ "b e",
1179
+ "th at</w>",
1180
+ "' s</w>",
1181
+ "c on",
1182
+ "m a",
1183
+ "f or</w>",
1184
+ "h a",
1185
+ "s u",
1186
+ "b y</w>",
1187
+ "it h</w>",
1188
+ "v e</w>",
1189
+ "w ith</w>",
1190
+ "s e</w>",
1191
+ "c h</w>",
1192
+ "th e",
1193
+ "en t",
1194
+ "p o",
1195
+ "c e</w>",
1196
+ "i l",
1197
+ "s e",
1198
+ "en t</w>",
1199
+ "l e",
1200
+ "c om",
1201
+ "s p",
1202
+ "er e</w>",
1203
+ "p ro",
1204
+ "n o",
1205
+ "b u",
1206
+ "w h",
1207
+ "i t</w>",
1208
+ "t h</w>",
1209
+ "v er",
1210
+ "n e",
1211
+ "c a",
1212
+ "i s",
1213
+ "f or",
1214
+ "a g",
1215
+ "er s</w>",
1216
+ "m o",
1217
+ "g h",
1218
+ "f ro",
1219
+ "t ed</w>",
1220
+ "fro m</w>",
1221
+ "ti on",
1222
+ "o p",
1223
+ "hi s</w>",
1224
+ "a d",
1225
+ "a b",
1226
+ "i c",
1227
+ "h e</w>",
1228
+ "ou n",
1229
+ "a s",
1230
+ "t s</w>",
1231
+ "s c",
1232
+ "d e</w>",
1233
+ "o w",
1234
+ "e x",
1235
+ "w hi",
1236
+ "r u",
1237
+ "t er</w>",
1238
+ "a p",
1239
+ "d s</w>",
1240
+ "w ere</w>",
1241
+ "p re",
1242
+ "d u",
1243
+ "g u",
1244
+ "p ar",
1245
+ "i r",
1246
+ "b o",
1247
+ "th er</w>",
1248
+ "q u",
1249
+ "l u",
1250
+ "t er",
1251
+ "t w",
1252
+ "e s",
1253
+ "re c",
1254
+ "p er",
1255
+ "t a",
1256
+ "at e</w>",
1257
+ "v er</w>",
1258
+ "at ed</w>",
1259
+ "d ing</w>",
1260
+ "it y</w>",
1261
+ "m an",
1262
+ "e ar",
1263
+ "s ed</w>",
1264
+ "d ed</w>",
1265
+ "a u",
1266
+ "al l</w>",
1267
+ "am e</w>",
1268
+ "c i",
1269
+ "on e</w>",
1270
+ "in g",
1271
+ "ar e</w>",
1272
+ "a f",
1273
+ "i r</w>",
1274
+ "a tion</w>",
1275
+ "â Ģ",
1276
+ "ha d</w>",
1277
+ "t r",
1278
+ "u l",
1279
+ "l d</w>",
1280
+ "whi ch</w>",
1281
+ "w a",
1282
+ "i m",
1283
+ "l ea",
1284
+ "b e</w>",
1285
+ "t o",
1286
+ "ti m",
1287
+ "fi r",
1288
+ "w or",
1289
+ "on g</w>",
1290
+ "p or",
1291
+ "m ar",
1292
+ "m e",
1293
+ "al ly</w>",
1294
+ "s o</w>",
1295
+ "ou t</w>",
1296
+ "tion s</w>",
1297
+ "it s</w>",
1298
+ "g h</w>",
1299
+ "g e</w>",
1300
+ "b er</w>",
1301
+ "f e",
1302
+ "p u",
1303
+ "s er",
1304
+ "d er",
1305
+ "p l",
1306
+ "s s</w>",
1307
+ "in e</w>",
1308
+ "in c",
1309
+ "m i",
1310
+ "gh t</w>",
1311
+ "g o",
1312
+ "th is</w>",
1313
+ "t ur",
1314
+ "d a",
1315
+ "ro u",
1316
+ "bu t</w>",
1317
+ "u m",
1318
+ "s on</w>",
1319
+ "w e",
1320
+ "v ed</w>",
1321
+ "si on</w>",
1322
+ "k e</w>",
1323
+ "p la",
1324
+ "the ir</w>",
1325
+ "i es</w>",
1326
+ "fir st</w>",
1327
+ "s a",
1328
+ "o c",
1329
+ "at t",
1330
+ "o f",
1331
+ "p e",
1332
+ "no t</w>",
1333
+ "g i",
1334
+ "n a",
1335
+ "ar y</w>",
1336
+ "m u",
1337
+ "l ed</w>",
1338
+ "âĢ ĵ</w>",
1339
+ "h er</w>",
1340
+ "r an",
1341
+ "c o",
1342
+ "the y</w>",
1343
+ "d er</w>",
1344
+ "al i",
1345
+ "al so</w>",
1346
+ "or e</w>",
1347
+ "e p",
1348
+ "ou ld</w>",
1349
+ "af ter</w>",
1350
+ "s hi",
1351
+ "u s</w>",
1352
+ "e t</w>",
1353
+ "ti c",
1354
+ "st or",
1355
+ "w i",
1356
+ "e v",
1357
+ "o ther</w>",
1358
+ "s h</w>",
1359
+ "t ing</w>",
1360
+ "ar d</w>",
1361
+ "t e",
1362
+ "tw o</w>",
1363
+ "n i",
1364
+ "ha ve</w>",
1365
+ "ou r",
1366
+ "com m",
1367
+ "t e</w>",
1368
+ "ac k</w>",
1369
+ "o o",
1370
+ "f in",
1371
+ "s ec",
1372
+ "ent s</w>",
1373
+ "h as</w>",
1374
+ "com p",
1375
+ "b ec",
1376
+ "k s</w>",
1377
+ "con t",
1378
+ "l and</w>",
1379
+ "be en</w>",
1380
+ "en ce</w>",
1381
+ "k ing</w>",
1382
+ "e l</w>",
1383
+ "ag e</w>",
1384
+ "lo w",
1385
+ "m in",
1386
+ ". @</w>",
1387
+ "@ .@</w>",
1388
+ "om e</w>",
1389
+ "m ent</w>",
1390
+ "ch ar",
1391
+ "g e",
1392
+ "at er</w>",
1393
+ "n or",
1394
+ "h o",
1395
+ "ou s</w>",
1396
+ "wh o</w>",
1397
+ "ea r</w>",
1398
+ "sp ec",
1399
+ "c ol",
1400
+ "el y</w>",
1401
+ "t y</w>",
1402
+ "j o",
1403
+ "ur ing</w>",
1404
+ "du c",
1405
+ "b ri",
1406
+ "st r",
1407
+ "c an",
1408
+ "or i",
1409
+ "t ra",
1410
+ "p a",
1411
+ "sh e</w>",
1412
+ "d o",
1413
+ "ti ve</w>",
1414
+ "m on",
1415
+ "ne w</w>",
1416
+ "r it",
1417
+ "tim e</w>",
1418
+ "on s</w>",
1419
+ "s o",
1420
+ "m an</w>",
1421
+ "d ec",
1422
+ "c ent",
1423
+ "l an",
1424
+ "p i",
1425
+ "ou r</w>",
1426
+ "in ter",
1427
+ "f er",
1428
+ "g ra",
1429
+ "g re",
1430
+ "re s</w>",
1431
+ "inc lu",
1432
+ "m il",
1433
+ "d uring</w>",
1434
+ "ow n</w>",
1435
+ "pre s",
1436
+ "j u",
1437
+ "n ed</w>",
1438
+ "el l</w>",
1439
+ ", @</w>",
1440
+ "@ ,@</w>",
1441
+ "it e</w>",
1442
+ "g en",
1443
+ "wh en</w>",
1444
+ "si g",
1445
+ "b i",
1446
+ "re n",
1447
+ "f a",
1448
+ "g a",
1449
+ "pla y",
1450
+ "en g",
1451
+ "tion al</w>",
1452
+ "oun d</w>",
1453
+ "th ou",
1454
+ "m ore</w>",
1455
+ "re e</w>",
1456
+ "em ber</w>",
1457
+ "e i",
1458
+ "s ou",
1459
+ "s ur",
1460
+ "s ti",
1461
+ "c ar",
1462
+ "for m",
1463
+ "l ar",
1464
+ "s es</w>",
1465
+ "t en",
1466
+ "in to</w>",
1467
+ "t u",
1468
+ "c es</w>",
1469
+ "mo st</w>",
1470
+ "k ed</w>",
1471
+ "wa y</w>",
1472
+ "c re",
1473
+ "c oun",
1474
+ "u p</w>",
1475
+ "l es</w>",
1476
+ "ac e</w>",
1477
+ "al s</w>",
1478
+ "k e",
1479
+ "w ould</w>",
1480
+ "an t</w>",
1481
+ "b er",
1482
+ "f u",
1483
+ "it ed</w>",
1484
+ "p ri",
1485
+ "whi le</w>",
1486
+ "o ver</w>",
1487
+ "ing s</w>",
1488
+ "r e</w>",
1489
+ "fi l",
1490
+ "s y",
1491
+ "e st",
1492
+ "ab le</w>",
1493
+ "w n</w>",
1494
+ "s ea",
1495
+ "ac h",
1496
+ "s ing</w>",
1497
+ "in s</w>",
1498
+ "ti c</w>",
1499
+ "i d</w>",
1500
+ "on ly</w>",
1501
+ "at es</w>",
1502
+ "t ri",
1503
+ "v ing</w>",
1504
+ "b a",
1505
+ "v el",
1506
+ "an ce</w>",
1507
+ "st a",
1508
+ "er n</w>",
1509
+ "f ol",
1510
+ "e en</w>",
1511
+ "in ed</w>",
1512
+ "st ru",
1513
+ "un i",
1514
+ "g ame</w>",
1515
+ "la r</w>",
1516
+ "s el",
1517
+ "b li",
1518
+ "u sed</w>",
1519
+ "n ing</w>",
1520
+ "p s</w>",
1521
+ "ti es</w>",
1522
+ "k no",
1523
+ "c or",
1524
+ "f t</w>",
1525
+ "rec or",
1526
+ "b le</w>",
1527
+ "vi e",
1528
+ "y s</w>",
1529
+ "w il",
1530
+ "ic al</w>",
1531
+ "ap p",
1532
+ "t ro",
1533
+ "th ree</w>",
1534
+ "c la",
1535
+ "ol d</w>",
1536
+ "sh ed</w>",
1537
+ "h ea",
1538
+ "ab out</w>",
1539
+ "w rit",
1540
+ "th an</w>",
1541
+ "st e",
1542
+ "l ater</w>",
1543
+ "ar i",
1544
+ "d y</w>",
1545
+ "pu bli",
1546
+ "lo c",
1547
+ "ag a",
1548
+ "th rou",
1549
+ "s si",
1550
+ "en d</w>",
1551
+ "ma y</w>",
1552
+ "an g",
1553
+ "ac h</w>",
1554
+ "v es</w>",
1555
+ "o g",
1556
+ "hi m</w>",
1557
+ "be tw",
1558
+ "thou gh</w>",
1559
+ "betw een</w>",
1560
+ "u m</w>",
1561
+ "st ar",
1562
+ "sc ri",
1563
+ "re a",
1564
+ "on d</w>",
1565
+ "shi p</w>",
1566
+ "o k</w>",
1567
+ "h el",
1568
+ "s ong</w>",
1569
+ "c hi",
1570
+ "ca p",
1571
+ "e ver</w>",
1572
+ "da y</w>",
1573
+ "c ri",
1574
+ "s ome</w>",
1575
+ "b ro",
1576
+ "n o</w>",
1577
+ "th ere</w>",
1578
+ "an s</w>",
1579
+ "al l",
1580
+ "n um",
1581
+ "r ed</w>",
1582
+ "ear s</w>",
1583
+ "st s</w>",
1584
+ "an y</w>",
1585
+ "w ar",
1586
+ "p h",
1587
+ "p p",
1588
+ "g in",
1589
+ "stru c",
1590
+ "am er",
1591
+ "pro duc",
1592
+ "s ch",
1593
+ "c es",
1594
+ "ur e</w>",
1595
+ "at ing</w>",
1596
+ "em p",
1597
+ "t or",
1598
+ "sea son</w>",
1599
+ "for e</w>",
1600
+ "i c</w>",
1601
+ "c ity</w>",
1602
+ "g ro",
1603
+ "fol low",
1604
+ "su b",
1605
+ "b el",
1606
+ "y ear</w>",
1607
+ "c an</w>",
1608
+ "s in",
1609
+ "wh ere</w>",
1610
+ "an d",
1611
+ "ma de</w>",
1612
+ "re lea",
1613
+ "s m",
1614
+ "b l",
1615
+ "t en</w>",
1616
+ "wi th",
1617
+ "s on",
1618
+ "man y</w>",
1619
+ "a re",
1620
+ "e d",
1621
+ "h ow",
1622
+ "amer ic",
1623
+ "ur y</w>",
1624
+ "st u",
1625
+ "mu si",
1626
+ "c u",
1627
+ "n am",
1628
+ "em ent</w>",
1629
+ "su ch</w>",
1630
+ "al bu",
1631
+ "bu il",
1632
+ "be fore</w>",
1633
+ "e f",
1634
+ "ar m",
1635
+ "t on</w>",
1636
+ "the m</w>",
1637
+ "c al",
1638
+ "b ar",
1639
+ "d es</w>",
1640
+ "m at",
1641
+ "gen er",
1642
+ "o d</w>",
1643
+ "ser ies</w>",
1644
+ "c er",
1645
+ "sh o",
1646
+ "en ti",
1647
+ "h er",
1648
+ "o ver",
1649
+ "an n",
1650
+ "w ell</w>",
1651
+ "wor ld</w>",
1652
+ "g an</w>",
1653
+ "e st</w>",
1654
+ "sec ond</w>",
1655
+ "t ers</w>",
1656
+ "si de</w>",
1657
+ "tr an",
1658
+ "l ine</w>",
1659
+ "tur e</w>",
1660
+ "por t</w>",
1661
+ "be ing</w>",
1662
+ "y ears</w>",
1663
+ "bo th</w>",
1664
+ "in di",
1665
+ "the se</w>",
1666
+ "na tional</w>",
1667
+ "hi stor",
1668
+ "f e</w>",
1669
+ "v o",
1670
+ "st ed</w>",
1671
+ "an i",
1672
+ "b as",
1673
+ "po in",
1674
+ "s ing",
1675
+ "fil m</w>",
1676
+ "p en",
1677
+ "su p",
1678
+ "m is",
1679
+ "c ro",
1680
+ "st ri",
1681
+ "l in",
1682
+ "t re",
1683
+ "wa r</w>",
1684
+ "how ever</w>",
1685
+ "y ing</w>",
1686
+ "l ing</w>",
1687
+ "y p",
1688
+ "ec ted</w>",
1689
+ "di rec",
1690
+ "vi sion</w>",
1691
+ "albu m</w>",
1692
+ "th en</w>",
1693
+ "l l</w>",
1694
+ "se ver",
1695
+ "throu gh</w>",
1696
+ "kno wn</w>",
1697
+ "b or",
1698
+ "c ul",
1699
+ "c lu",
1700
+ "st er</w>",
1701
+ "sou th</w>",
1702
+ "r y</w>",
1703
+ "ec t</w>",
1704
+ "lo w</w>",
1705
+ "p r",
1706
+ "s k",
1707
+ "is o",
1708
+ "nor th</w>",
1709
+ "par t</w>",
1710
+ "f ac",
1711
+ "t ly</w>",
1712
+ "per i",
1713
+ "e u",
1714
+ "b att",
1715
+ "st ate</w>",
1716
+ "c ed</w>",
1717
+ "con si",
1718
+ "in f",
1719
+ "po li",
1720
+ "ol og",
1721
+ "ear ly</w>",
1722
+ "po si",
1723
+ "am es</w>",
1724
+ "w in",
1725
+ "de vel",
1726
+ "o b",
1727
+ "v e",
1728
+ "v en</w>",
1729
+ "op er",
1730
+ "g er",
1731
+ "of fi",
1732
+ "char ac",
1733
+ "m s</w>",
1734
+ "hi gh",
1735
+ "a d</w>",
1736
+ "th o",
1737
+ "sever al</w>",
1738
+ "d re",
1739
+ "de scri",
1740
+ "al e</w>",
1741
+ "num ber</w>",
1742
+ "a ir",
1743
+ "inclu ding</w>",
1744
+ "in st</w>",
1745
+ "aga inst</w>",
1746
+ "l s</w>",
1747
+ "su l",
1748
+ "ep iso",
1749
+ "c am",
1750
+ "di f",
1751
+ "so ci",
1752
+ "bec ame</w>",
1753
+ "li ke</w>",
1754
+ "t el",
1755
+ "f our</w>",
1756
+ "âĢ Ķ</w>",
1757
+ "h ou",
1758
+ "jo h",
1759
+ "un ited</w>",
1760
+ "in v",
1761
+ "un der</w>",
1762
+ "no v",
1763
+ "ti v",
1764
+ "su c",
1765
+ "a tions</w>",
1766
+ "ac k",
1767
+ "t or</w>",
1768
+ "r on",
1769
+ "un d</w>",
1770
+ "w s</w>",
1771
+ "f o",
1772
+ "g r",
1773
+ "devel op",
1774
+ "al though</w>",
1775
+ "cont in",
1776
+ "we st</w>",
1777
+ "ori gin",
1778
+ "musi c</w>",
1779
+ "or s</w>",
1780
+ "d on</w>",
1781
+ "cent ury</w>",
1782
+ "w ard</w>",
1783
+ "wor k</w>",
1784
+ "m e</w>",
1785
+ "am i",
1786
+ "ch a",
1787
+ "ver y</w>",
1788
+ "h ar",
1789
+ "di s",
1790
+ "z ed</w>",
1791
+ "d o</w>",
1792
+ "g s</w>",
1793
+ "t ow",
1794
+ "s ol",
1795
+ "follow ing</w>",
1796
+ "li on</w>",
1797
+ "re ma",
1798
+ "n s</w>",
1799
+ "ti sh</w>",
1800
+ "ch ur",
1801
+ "s om",
1802
+ "m p",
1803
+ "t le</w>",
1804
+ "go ver",
1805
+ "d el",
1806
+ "comp le",
1807
+ "c ur",
1808
+ "u se</w>",
1809
+ "b ack</w>",
1810
+ "h u",
1811
+ "st ern</w>",
1812
+ "be gan</w>",
1813
+ "fi el",
1814
+ "au se</w>",
1815
+ "d ra",
1816
+ "p as",
1817
+ "b il",
1818
+ "ca tion</w>",
1819
+ "d ent</w>",
1820
+ "b ed</w>",
1821
+ "bec ause</w>",
1822
+ "an t",
1823
+ "ea m</w>",
1824
+ "p hi",
1825
+ "y o",
1826
+ "contin u",
1827
+ "ta in</w>",
1828
+ "tr y</w>",
1829
+ "f re",
1830
+ "pe op",
1831
+ "cal led</w>",
1832
+ "f ound</w>",
1833
+ "episo de</w>",
1834
+ "de sig",
1835
+ "m or",
1836
+ "se t</w>",
1837
+ "le y</w>",
1838
+ "ea st</w>",
1839
+ "tr ac",
1840
+ "c ra"
1841
+ ]
1842
+ }
1843
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "temp/dummy/clip/processors",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "/home/huggingface/.cache/huggingface/hub/models--openai--clip-vit-base-patch32/snapshots/e6a30b603a447e251fdaca1c3056b2a16cdfebeb/special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
vocab.json ADDED
@@ -0,0 +1,1026 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "!": 2,
3
+ "!</w>": 277,
4
+ "\"": 3,
5
+ "\"</w>": 281,
6
+ "#": 4,
7
+ "#</w>": 288,
8
+ "$": 5,
9
+ "$</w>": 274,
10
+ "%": 6,
11
+ "%</w>": 255,
12
+ "&": 7,
13
+ "&</w>": 258,
14
+ "'": 8,
15
+ "'</w>": 223,
16
+ "'s</w>": 363,
17
+ "(": 9,
18
+ "(</w>": 289,
19
+ ")": 10,
20
+ ")</w>": 279,
21
+ "*": 11,
22
+ "*</w>": 295,
23
+ "+": 12,
24
+ "+</w>": 294,
25
+ ",": 13,
26
+ ",</w>": 276,
27
+ ",@</w>": 622,
28
+ "-": 14,
29
+ "-</w>": 247,
30
+ "-@</w>": 353,
31
+ ".": 15,
32
+ ".</w>": 236,
33
+ ".@</w>": 569,
34
+ "/": 16,
35
+ "/</w>": 272,
36
+ "0": 17,
37
+ "0</w>": 285,
38
+ "1": 18,
39
+ "1</w>": 293,
40
+ "2": 19,
41
+ "2</w>": 286,
42
+ "3": 20,
43
+ "3</w>": 291,
44
+ "4": 21,
45
+ "4</w>": 284,
46
+ "5": 22,
47
+ "5</w>": 275,
48
+ "6": 23,
49
+ "6</w>": 290,
50
+ "7": 24,
51
+ "7</w>": 280,
52
+ "8": 25,
53
+ "8</w>": 240,
54
+ "9": 26,
55
+ "9</w>": 218,
56
+ ":": 27,
57
+ ":</w>": 292,
58
+ ";": 28,
59
+ ";</w>": 228,
60
+ "<": 29,
61
+ "<</w>": 265,
62
+ "<|endoftext|>": 1,
63
+ "<|startoftext|>": 0,
64
+ "=": 30,
65
+ "=</w>": 264,
66
+ ">": 31,
67
+ "></w>": 273,
68
+ "?": 32,
69
+ "?</w>": 296,
70
+ "@": 33,
71
+ "@,@</w>": 623,
72
+ "@-@</w>": 354,
73
+ "@.@</w>": 570,
74
+ "@</w>": 231,
75
+ "[": 34,
76
+ "[</w>": 283,
77
+ "\\": 35,
78
+ "\\</w>": 297,
79
+ "]": 36,
80
+ "]</w>": 278,
81
+ "^": 37,
82
+ "^</w>": 282,
83
+ "_": 38,
84
+ "_</w>": 269,
85
+ "`": 39,
86
+ "`</w>": 267,
87
+ "a": 40,
88
+ "a</w>": 184,
89
+ "ab": 408,
90
+ "able</w>": 675,
91
+ "about</w>": 721,
92
+ "ac": 326,
93
+ "ace</w>": 659,
94
+ "ach": 678,
95
+ "ach</w>": 736,
96
+ "ack": 949,
97
+ "ack</w>": 551,
98
+ "ad": 407,
99
+ "ad</w>": 918,
100
+ "af": 455,
101
+ "after</w>": 532,
102
+ "ag": 397,
103
+ "aga": 730,
104
+ "against</w>": 928,
105
+ "age</w>": 566,
106
+ "air": 925,
107
+ "al": 318,
108
+ "al</w>": 330,
109
+ "albu": 813,
110
+ "album</w>": 874,
111
+ "ale</w>": 923,
112
+ "ali": 527,
113
+ "all": 762,
114
+ "all</w>": 449,
115
+ "ally</w>": 476,
116
+ "als</w>": 660,
117
+ "also</w>": 528,
118
+ "although</w>": 957,
119
+ "am": 333,
120
+ "ame</w>": 450,
121
+ "amer": 773,
122
+ "americ": 805,
123
+ "ames</w>": 906,
124
+ "ami": 968,
125
+ "an": 301,
126
+ "an</w>": 332,
127
+ "ance</w>": 689,
128
+ "and": 793,
129
+ "and</w>": 311,
130
+ "ang": 735,
131
+ "ani": 854,
132
+ "ann": 832,
133
+ "ans</w>": 761,
134
+ "ant": 1005,
135
+ "ant</w>": 663,
136
+ "any</w>": 767,
137
+ "ap": 421,
138
+ "app": 714,
139
+ "ar": 305,
140
+ "ard</w>": 543,
141
+ "are": 802,
142
+ "are</w>": 454,
143
+ "ari": 726,
144
+ "arm": 817,
145
+ "ary</w>": 518,
146
+ "as": 412,
147
+ "as</w>": 321,
148
+ "at": 322,
149
+ "at</w>": 328,
150
+ "ate</w>": 439,
151
+ "ated</w>": 441,
152
+ "ater</w>": 575,
153
+ "ates</w>": 684,
154
+ "ating</w>": 778,
155
+ "ation</w>": 457,
156
+ "ations</w>": 948,
157
+ "att": 512,
158
+ "au": 448,
159
+ "ause</w>": 997,
160
+ "b": 41,
161
+ "b</w>": 199,
162
+ "ba": 687,
163
+ "back</w>": 992,
164
+ "bar": 821,
165
+ "bas": 855,
166
+ "batt": 897,
167
+ "be": 361,
168
+ "be</w>": 467,
169
+ "bec": 558,
170
+ "became</w>": 935,
171
+ "because</w>": 1004,
172
+ "bed</w>": 1003,
173
+ "been</w>": 562,
174
+ "before</w>": 815,
175
+ "began</w>": 995,
176
+ "being</w>": 844,
177
+ "bel": 788,
178
+ "ber": 664,
179
+ "ber</w>": 483,
180
+ "betw": 740,
181
+ "between</w>": 742,
182
+ "bi": 628,
183
+ "bil": 1000,
184
+ "bl": 797,
185
+ "ble</w>": 709,
186
+ "bli": 700,
187
+ "bo": 429,
188
+ "bor": 880,
189
+ "both</w>": 846,
190
+ "bri": 588,
191
+ "bro": 758,
192
+ "bu": 388,
193
+ "buil": 814,
194
+ "but</w>": 499,
195
+ "by</w>": 369,
196
+ "c": 42,
197
+ "c</w>": 178,
198
+ "ca": 394,
199
+ "cal": 820,
200
+ "called</w>": 1014,
201
+ "cam": 932,
202
+ "can": 590,
203
+ "can</w>": 790,
204
+ "cap": 753,
205
+ "car": 644,
206
+ "cation</w>": 1001,
207
+ "ce</w>": 378,
208
+ "ced</w>": 899,
209
+ "cent": 605,
210
+ "century</w>": 964,
211
+ "cer": 827,
212
+ "ces": 776,
213
+ "ces</w>": 651,
214
+ "ch": 345,
215
+ "ch</w>": 374,
216
+ "cha": 969,
217
+ "char": 573,
218
+ "charac": 915,
219
+ "chi": 752,
220
+ "chur": 983,
221
+ "ci": 451,
222
+ "city</w>": 784,
223
+ "cla": 717,
224
+ "clu": 882,
225
+ "co": 524,
226
+ "col": 582,
227
+ "com": 383,
228
+ "comm": 549,
229
+ "comp": 557,
230
+ "comple": 989,
231
+ "con": 364,
232
+ "consi": 900,
233
+ "cont": 560,
234
+ "contin": 958,
235
+ "continu": 1009,
236
+ "cor": 706,
237
+ "coun": 656,
238
+ "cra": 1023,
239
+ "cre": 655,
240
+ "cri": 756,
241
+ "cro": 862,
242
+ "cu": 809,
243
+ "cul": 881,
244
+ "cur": 990,
245
+ "d": 43,
246
+ "d</w>": 167,
247
+ "da": 497,
248
+ "day</w>": 755,
249
+ "de": 352,
250
+ "de</w>": 415,
251
+ "dec": 604,
252
+ "ded</w>": 447,
253
+ "del": 988,
254
+ "dent</w>": 1002,
255
+ "der": 487,
256
+ "der</w>": 526,
257
+ "des</w>": 822,
258
+ "descri": 922,
259
+ "desig": 1017,
260
+ "devel": 908,
261
+ "develop": 956,
262
+ "di": 340,
263
+ "dif": 933,
264
+ "ding</w>": 442,
265
+ "direc": 872,
266
+ "dis": 972,
267
+ "do": 595,
268
+ "do</w>": 974,
269
+ "don</w>": 963,
270
+ "dra": 998,
271
+ "dre": 921,
272
+ "ds</w>": 422,
273
+ "du": 425,
274
+ "duc": 587,
275
+ "during</w>": 616,
276
+ "dy</w>": 727,
277
+ "e": 44,
278
+ "e</w>": 171,
279
+ "ea": 344,
280
+ "eam</w>": 1006,
281
+ "ear": 445,
282
+ "ear</w>": 580,
283
+ "early</w>": 904,
284
+ "ears</w>": 765,
285
+ "east</w>": 1021,
286
+ "ec": 338,
287
+ "ect</w>": 886,
288
+ "ected</w>": 871,
289
+ "ed": 803,
290
+ "ed</w>": 302,
291
+ "een</w>": 693,
292
+ "ef": 816,
293
+ "ei": 640,
294
+ "el": 331,
295
+ "el</w>": 565,
296
+ "ell</w>": 621,
297
+ "ely</w>": 583,
298
+ "em": 360,
299
+ "ember</w>": 639,
300
+ "ement</w>": 811,
301
+ "emp": 779,
302
+ "en": 308,
303
+ "en</w>": 343,
304
+ "ence</w>": 563,
305
+ "end</w>": 733,
306
+ "eng": 633,
307
+ "ent": 376,
308
+ "ent</w>": 381,
309
+ "enti": 829,
310
+ "ents</w>": 555,
311
+ "ep": 530,
312
+ "episo": 931,
313
+ "episode</w>": 1016,
314
+ "er": 303,
315
+ "er</w>": 312,
316
+ "ere</w>": 385,
317
+ "ern</w>": 691,
318
+ "ers</w>": 398,
319
+ "es": 435,
320
+ "es</w>": 323,
321
+ "est": 674,
322
+ "est</w>": 836,
323
+ "et</w>": 535,
324
+ "eu": 896,
325
+ "ev": 539,
326
+ "ever</w>": 754,
327
+ "ex": 417,
328
+ "f": 45,
329
+ "f</w>": 187,
330
+ "fa": 630,
331
+ "fac": 893,
332
+ "fe": 484,
333
+ "fe</w>": 851,
334
+ "fer": 610,
335
+ "fi": 350,
336
+ "fiel": 996,
337
+ "fil": 672,
338
+ "film</w>": 858,
339
+ "fin": 553,
340
+ "fir": 470,
341
+ "first</w>": 509,
342
+ "fo": 954,
343
+ "fol": 692,
344
+ "follow": 786,
345
+ "following</w>": 978,
346
+ "for": 396,
347
+ "for</w>": 366,
348
+ "fore</w>": 782,
349
+ "form": 645,
350
+ "found</w>": 1015,
351
+ "four</w>": 938,
352
+ "fre": 1012,
353
+ "fro": 401,
354
+ "from</w>": 403,
355
+ "ft</w>": 707,
356
+ "fu": 665,
357
+ "g": 46,
358
+ "g</w>": 175,
359
+ "ga": 631,
360
+ "game</w>": 697,
361
+ "gan</w>": 835,
362
+ "ge": 574,
363
+ "ge</w>": 482,
364
+ "gen": 625,
365
+ "gener": 824,
366
+ "ger": 913,
367
+ "gh": 400,
368
+ "gh</w>": 481,
369
+ "ght</w>": 493,
370
+ "gi": 516,
371
+ "gin": 771,
372
+ "go": 494,
373
+ "gover": 987,
374
+ "gr": 955,
375
+ "gra": 611,
376
+ "gre": 612,
377
+ "gro": 785,
378
+ "gs</w>": 975,
379
+ "gu": 426,
380
+ "h": 47,
381
+ "h</w>": 168,
382
+ "ha": 367,
383
+ "had</w>": 459,
384
+ "har": 971,
385
+ "has</w>": 556,
386
+ "have</w>": 547,
387
+ "he</w>": 410,
388
+ "hea": 720,
389
+ "hel": 750,
390
+ "her": 830,
391
+ "her</w>": 522,
392
+ "hi": 325,
393
+ "high": 917,
394
+ "him</w>": 739,
395
+ "his</w>": 406,
396
+ "histor": 850,
397
+ "ho": 577,
398
+ "hou": 940,
399
+ "how": 804,
400
+ "however</w>": 867,
401
+ "hu": 993,
402
+ "i": 48,
403
+ "i</w>": 185,
404
+ "ic": 409,
405
+ "ic</w>": 783,
406
+ "ical</w>": 713,
407
+ "id</w>": 682,
408
+ "ies</w>": 508,
409
+ "il": 379,
410
+ "im": 465,
411
+ "in": 300,
412
+ "in</w>": 314,
413
+ "inc": 491,
414
+ "inclu": 614,
415
+ "including</w>": 926,
416
+ "indi": 847,
417
+ "ine</w>": 490,
418
+ "ined</w>": 694,
419
+ "inf": 901,
420
+ "ing": 453,
421
+ "ing</w>": 315,
422
+ "ings</w>": 670,
423
+ "ins</w>": 680,
424
+ "inst</w>": 927,
425
+ "inter": 609,
426
+ "into</w>": 649,
427
+ "inv": 943,
428
+ "ir": 428,
429
+ "ir</w>": 456,
430
+ "is": 395,
431
+ "is</w>": 349,
432
+ "iso": 890,
433
+ "it": 319,
434
+ "it</w>": 390,
435
+ "ite</w>": 624,
436
+ "ited</w>": 666,
437
+ "ith</w>": 370,
438
+ "its</w>": 480,
439
+ "ity</w>": 443,
440
+ "j": 49,
441
+ "j</w>": 203,
442
+ "jo": 585,
443
+ "joh": 941,
444
+ "ju": 619,
445
+ "k": 50,
446
+ "k</w>": 183,
447
+ "ke": 661,
448
+ "ke</w>": 505,
449
+ "ked</w>": 653,
450
+ "king</w>": 564,
451
+ "kno": 705,
452
+ "known</w>": 879,
453
+ "ks</w>": 559,
454
+ "l": 51,
455
+ "l</w>": 172,
456
+ "la": 348,
457
+ "lan": 606,
458
+ "land</w>": 561,
459
+ "lar": 646,
460
+ "lar</w>": 698,
461
+ "later</w>": 725,
462
+ "ld</w>": 462,
463
+ "le": 382,
464
+ "le</w>": 357,
465
+ "lea": 466,
466
+ "led</w>": 520,
467
+ "les</w>": 658,
468
+ "ley</w>": 1020,
469
+ "li": 336,
470
+ "like</w>": 936,
471
+ "lin": 864,
472
+ "line</w>": 841,
473
+ "ling</w>": 869,
474
+ "lion</w>": 979,
475
+ "ll</w>": 876,
476
+ "lo": 358,
477
+ "loc": 729,
478
+ "low": 567,
479
+ "low</w>": 887,
480
+ "ls</w>": 929,
481
+ "lu": 432,
482
+ "ly</w>": 342,
483
+ "m": 52,
484
+ "m</w>": 173,
485
+ "ma": 365,
486
+ "made</w>": 794,
487
+ "man": 444,
488
+ "man</w>": 603,
489
+ "many</w>": 801,
490
+ "mar": 474,
491
+ "mat": 823,
492
+ "may</w>": 734,
493
+ "me": 475,
494
+ "me</w>": 967,
495
+ "ment</w>": 572,
496
+ "mi": 492,
497
+ "mil": 615,
498
+ "min": 568,
499
+ "mis": 861,
500
+ "mo": 399,
501
+ "mon": 597,
502
+ "mor": 1018,
503
+ "more</w>": 637,
504
+ "most</w>": 652,
505
+ "mp": 985,
506
+ "ms</w>": 916,
507
+ "mu": 519,
508
+ "musi": 808,
509
+ "music</w>": 961,
510
+ "n": 53,
511
+ "n</w>": 174,
512
+ "na": 517,
513
+ "nam": 810,
514
+ "national</w>": 849,
515
+ "ne": 393,
516
+ "ned</w>": 620,
517
+ "new</w>": 598,
518
+ "ni": 546,
519
+ "ning</w>": 702,
520
+ "no": 387,
521
+ "no</w>": 759,
522
+ "nor": 576,
523
+ "north</w>": 891,
524
+ "not</w>": 515,
525
+ "nov": 945,
526
+ "ns</w>": 981,
527
+ "num": 763,
528
+ "number</w>": 924,
529
+ "o": 54,
530
+ "o</w>": 182,
531
+ "ob": 909,
532
+ "oc": 511,
533
+ "od</w>": 825,
534
+ "of": 513,
535
+ "of</w>": 309,
536
+ "offi": 914,
537
+ "og": 738,
538
+ "ok</w>": 749,
539
+ "ol": 351,
540
+ "old</w>": 718,
541
+ "olog": 903,
542
+ "om": 339,
543
+ "ome</w>": 571,
544
+ "on": 307,
545
+ "on</w>": 313,
546
+ "ond</w>": 747,
547
+ "one</w>": 452,
548
+ "ong</w>": 472,
549
+ "only</w>": 683,
550
+ "ons</w>": 601,
551
+ "oo": 552,
552
+ "op": 405,
553
+ "oper": 912,
554
+ "or": 310,
555
+ "or</w>": 334,
556
+ "ore</w>": 529,
557
+ "ori": 591,
558
+ "origin": 960,
559
+ "ors</w>": 962,
560
+ "other</w>": 540,
561
+ "ou": 324,
562
+ "ould</w>": 531,
563
+ "oun": 411,
564
+ "ound</w>": 635,
565
+ "our": 548,
566
+ "our</w>": 608,
567
+ "ous</w>": 578,
568
+ "out</w>": 478,
569
+ "over": 831,
570
+ "over</w>": 669,
571
+ "ow": 416,
572
+ "own</w>": 617,
573
+ "p": 55,
574
+ "p</w>": 189,
575
+ "pa": 593,
576
+ "par": 427,
577
+ "part</w>": 892,
578
+ "pas": 999,
579
+ "pe": 514,
580
+ "pen": 859,
581
+ "peop": 1013,
582
+ "per": 437,
583
+ "peri": 895,
584
+ "ph": 769,
585
+ "phi": 1007,
586
+ "pi": 607,
587
+ "pl": 488,
588
+ "pla": 506,
589
+ "play": 632,
590
+ "po": 377,
591
+ "poin": 856,
592
+ "poli": 902,
593
+ "por": 473,
594
+ "port</w>": 843,
595
+ "posi": 905,
596
+ "pp": 770,
597
+ "pr": 888,
598
+ "pre": 424,
599
+ "pres": 618,
600
+ "pri": 667,
601
+ "pro": 386,
602
+ "produc": 774,
603
+ "ps</w>": 703,
604
+ "pu": 485,
605
+ "publi": 728,
606
+ "q": 56,
607
+ "q</w>": 210,
608
+ "qu": 431,
609
+ "r": 57,
610
+ "r</w>": 176,
611
+ "ra": 355,
612
+ "ran": 523,
613
+ "re": 304,
614
+ "re</w>": 671,
615
+ "rea": 746,
616
+ "rec": 436,
617
+ "recor": 708,
618
+ "red</w>": 764,
619
+ "ree</w>": 638,
620
+ "relea": 795,
621
+ "rema": 980,
622
+ "ren": 629,
623
+ "res</w>": 613,
624
+ "ri": 329,
625
+ "rit": 599,
626
+ "ro": 317,
627
+ "ron": 951,
628
+ "rou": 498,
629
+ "ru": 419,
630
+ "ry</w>": 885,
631
+ "s": 58,
632
+ "s</w>": 170,
633
+ "sa": 510,
634
+ "sc": 414,
635
+ "sch": 775,
636
+ "scri": 745,
637
+ "se": 380,
638
+ "se</w>": 373,
639
+ "sea": 677,
640
+ "season</w>": 781,
641
+ "sec": 554,
642
+ "second</w>": 837,
643
+ "sed</w>": 446,
644
+ "sel": 699,
645
+ "ser": 486,
646
+ "series</w>": 826,
647
+ "ses</w>": 647,
648
+ "set</w>": 1019,
649
+ "sever": 877,
650
+ "several</w>": 920,
651
+ "sh": 359,
652
+ "sh</w>": 541,
653
+ "she</w>": 594,
654
+ "shed</w>": 719,
655
+ "shi": 533,
656
+ "ship</w>": 748,
657
+ "sho": 828,
658
+ "si": 327,
659
+ "side</w>": 839,
660
+ "sig": 627,
661
+ "sin": 791,
662
+ "sing": 857,
663
+ "sing</w>": 679,
664
+ "sion</w>": 504,
665
+ "sk": 889,
666
+ "sm": 796,
667
+ "so": 602,
668
+ "so</w>": 477,
669
+ "soci": 934,
670
+ "sol": 977,
671
+ "som": 984,
672
+ "some</w>": 757,
673
+ "son": 800,
674
+ "son</w>": 501,
675
+ "song</w>": 751,
676
+ "sou": 641,
677
+ "south</w>": 884,
678
+ "sp": 384,
679
+ "spec": 581,
680
+ "ss</w>": 489,
681
+ "ssi": 732,
682
+ "st": 316,
683
+ "st</w>": 335,
684
+ "sta": 690,
685
+ "star": 744,
686
+ "state</w>": 898,
687
+ "ste": 724,
688
+ "sted</w>": 853,
689
+ "ster</w>": 883,
690
+ "stern</w>": 994,
691
+ "sti": 643,
692
+ "stor": 537,
693
+ "str": 589,
694
+ "stri": 863,
695
+ "stru": 695,
696
+ "struc": 772,
697
+ "sts</w>": 766,
698
+ "stu": 807,
699
+ "su": 368,
700
+ "sub": 787,
701
+ "suc": 947,
702
+ "such</w>": 812,
703
+ "sul": 930,
704
+ "sup": 860,
705
+ "sur": 642,
706
+ "sy": 673,
707
+ "t": 59,
708
+ "t</w>": 180,
709
+ "ta": 438,
710
+ "tain</w>": 1010,
711
+ "te": 544,
712
+ "te</w>": 550,
713
+ "ted</w>": 402,
714
+ "tel": 937,
715
+ "ten": 648,
716
+ "ten</w>": 798,
717
+ "ter": 433,
718
+ "ter</w>": 420,
719
+ "ters</w>": 838,
720
+ "th": 298,
721
+ "th</w>": 391,
722
+ "than</w>": 723,
723
+ "that</w>": 362,
724
+ "the": 375,
725
+ "the</w>": 299,
726
+ "their</w>": 507,
727
+ "them</w>": 819,
728
+ "then</w>": 875,
729
+ "ther</w>": 430,
730
+ "there</w>": 760,
731
+ "these</w>": 848,
732
+ "they</w>": 525,
733
+ "this</w>": 495,
734
+ "tho": 919,
735
+ "thou": 636,
736
+ "though</w>": 741,
737
+ "three</w>": 716,
738
+ "throu": 731,
739
+ "through</w>": 878,
740
+ "ti": 306,
741
+ "tic": 536,
742
+ "tic</w>": 681,
743
+ "ties</w>": 704,
744
+ "tim": 469,
745
+ "time</w>": 600,
746
+ "ting</w>": 542,
747
+ "tion": 404,
748
+ "tion</w>": 347,
749
+ "tional</w>": 634,
750
+ "tions</w>": 479,
751
+ "tish</w>": 982,
752
+ "tiv": 946,
753
+ "tive</w>": 596,
754
+ "tle</w>": 986,
755
+ "tly</w>": 894,
756
+ "to": 468,
757
+ "to</w>": 320,
758
+ "ton</w>": 818,
759
+ "tor": 780,
760
+ "tor</w>": 950,
761
+ "tow": 976,
762
+ "tr": 460,
763
+ "tra": 592,
764
+ "trac": 1022,
765
+ "tran": 840,
766
+ "tre": 865,
767
+ "tri": 685,
768
+ "tro": 715,
769
+ "try</w>": 1011,
770
+ "ts</w>": 413,
771
+ "tu": 650,
772
+ "tur": 496,
773
+ "ture</w>": 842,
774
+ "tw": 434,
775
+ "two</w>": 545,
776
+ "ty</w>": 584,
777
+ "u": 60,
778
+ "u</w>": 186,
779
+ "ul": 461,
780
+ "um": 500,
781
+ "um</w>": 743,
782
+ "un": 346,
783
+ "und</w>": 952,
784
+ "under</w>": 944,
785
+ "uni": 696,
786
+ "united</w>": 942,
787
+ "up</w>": 657,
788
+ "ur": 337,
789
+ "ure</w>": 777,
790
+ "uring</w>": 586,
791
+ "ury</w>": 806,
792
+ "us</w>": 534,
793
+ "use</w>": 991,
794
+ "used</w>": 701,
795
+ "v": 61,
796
+ "v</w>": 196,
797
+ "ve": 910,
798
+ "ve</w>": 371,
799
+ "ved</w>": 503,
800
+ "vel": 688,
801
+ "ven</w>": 911,
802
+ "ver": 392,
803
+ "ver</w>": 440,
804
+ "very</w>": 970,
805
+ "ves</w>": 737,
806
+ "vi": 356,
807
+ "vie": 710,
808
+ "ving</w>": 686,
809
+ "vision</w>": 873,
810
+ "vo": 852,
811
+ "w": 62,
812
+ "w</w>": 194,
813
+ "wa": 464,
814
+ "war": 768,
815
+ "war</w>": 866,
816
+ "ward</w>": 965,
817
+ "was</w>": 341,
818
+ "way</w>": 654,
819
+ "we": 502,
820
+ "well</w>": 833,
821
+ "were</w>": 423,
822
+ "west</w>": 959,
823
+ "wh": 389,
824
+ "when</w>": 626,
825
+ "where</w>": 792,
826
+ "whi": 418,
827
+ "which</w>": 463,
828
+ "while</w>": 668,
829
+ "who</w>": 579,
830
+ "wi": 538,
831
+ "wil": 712,
832
+ "win": 907,
833
+ "with": 799,
834
+ "with</w>": 372,
835
+ "wn</w>": 676,
836
+ "wor": 471,
837
+ "work</w>": 966,
838
+ "world</w>": 834,
839
+ "would</w>": 662,
840
+ "writ": 722,
841
+ "ws</w>": 953,
842
+ "x": 63,
843
+ "x</w>": 200,
844
+ "y": 64,
845
+ "y</w>": 169,
846
+ "year</w>": 789,
847
+ "years</w>": 845,
848
+ "ying</w>": 868,
849
+ "yo": 1008,
850
+ "yp": 870,
851
+ "ys</w>": 711,
852
+ "z": 65,
853
+ "z</w>": 193,
854
+ "zed</w>": 973,
855
+ "|": 66,
856
+ "|</w>": 268,
857
+ "}": 67,
858
+ "}</w>": 227,
859
+ "~": 68,
860
+ "~</w>": 287,
861
+ "¡": 69,
862
+ "¡</w>": 206,
863
+ "¢": 70,
864
+ "¢</w>": 221,
865
+ "£": 71,
866
+ "£</w>": 243,
867
+ "¤": 72,
868
+ "¤</w>": 192,
869
+ "¥": 73,
870
+ "¥</w>": 212,
871
+ "¦": 74,
872
+ "¦</w>": 215,
873
+ "§": 75,
874
+ "§</w>": 226,
875
+ "¨": 76,
876
+ "¨</w>": 177,
877
+ "©": 77,
878
+ "©</w>": 188,
879
+ "ª": 78,
880
+ "ª</w>": 202,
881
+ "«": 79,
882
+ "«</w>": 205,
883
+ "¬": 80,
884
+ "¬</w>": 232,
885
+ "®": 81,
886
+ "®</w>": 248,
887
+ "¯": 82,
888
+ "¯</w>": 190,
889
+ "°": 83,
890
+ "°</w>": 201,
891
+ "±": 84,
892
+ "±</w>": 179,
893
+ "²": 85,
894
+ "²</w>": 246,
895
+ "³": 86,
896
+ "³</w>": 235,
897
+ "´": 87,
898
+ "´</w>": 257,
899
+ "µ": 88,
900
+ "µ</w>": 225,
901
+ "¶": 89,
902
+ "¶</w>": 214,
903
+ "·": 90,
904
+ "·</w>": 211,
905
+ "¸": 91,
906
+ "¸</w>": 217,
907
+ "¹": 92,
908
+ "¹</w>": 207,
909
+ "º": 93,
910
+ "º</w>": 204,
911
+ "»": 94,
912
+ "»</w>": 198,
913
+ "¼": 95,
914
+ "¼</w>": 224,
915
+ "½": 96,
916
+ "½</w>": 271,
917
+ "¾": 97,
918
+ "¾</w>": 191,
919
+ "¿": 98,
920
+ "¿</w>": 249,
921
+ "Â": 99,
922
+ "Ã": 100,
923
+ "Ä": 101,
924
+ "Å": 102,
925
+ "Æ": 103,
926
+ "Ç": 104,
927
+ "È": 105,
928
+ "É": 106,
929
+ "Ê": 107,
930
+ "Ë": 108,
931
+ "Ì": 109,
932
+ "Í": 110,
933
+ "Î": 111,
934
+ "Ï": 112,
935
+ "Ð": 113,
936
+ "Ñ": 114,
937
+ "Ö": 115,
938
+ "×": 116,
939
+ "Ø": 117,
940
+ "Ù": 118,
941
+ "Ü": 119,
942
+ "à": 120,
943
+ "á": 121,
944
+ "â": 122,
945
+ "âĢ": 458,
946
+ "âĢĵ</w>": 521,
947
+ "âĢĶ</w>": 939,
948
+ "ã": 123,
949
+ "ä": 124,
950
+ "å": 125,
951
+ "æ": 126,
952
+ "ç": 127,
953
+ "è": 128,
954
+ "é": 129,
955
+ "ë": 130,
956
+ "ì": 131,
957
+ "ï": 132,
958
+ "Ģ": 133,
959
+ "Ģ</w>": 252,
960
+ "ģ": 134,
961
+ "ģ</w>": 260,
962
+ "Ĥ": 135,
963
+ "Ĥ</w>": 195,
964
+ "ĥ": 136,
965
+ "ĥ</w>": 208,
966
+ "Ħ": 137,
967
+ "Ħ</w>": 239,
968
+ "ħ": 138,
969
+ "ħ</w>": 270,
970
+ "Ĩ": 139,
971
+ "Ĩ</w>": 238,
972
+ "ĩ": 140,
973
+ "ĩ</w>": 197,
974
+ "Ī": 141,
975
+ "Ī</w>": 256,
976
+ "ī": 142,
977
+ "ī</w>": 253,
978
+ "Ĭ": 143,
979
+ "Ĭ</w>": 209,
980
+ "ĭ": 144,
981
+ "ĭ</w>": 259,
982
+ "Į": 145,
983
+ "Į</w>": 219,
984
+ "į": 146,
985
+ "į</w>": 181,
986
+ "İ": 147,
987
+ "İ</w>": 261,
988
+ "ı": 148,
989
+ "ı</w>": 222,
990
+ "IJ": 149,
991
+ "IJ</w>": 216,
992
+ "ij": 150,
993
+ "ij</w>": 262,
994
+ "Ĵ": 151,
995
+ "Ĵ</w>": 234,
996
+ "ĵ": 152,
997
+ "ĵ</w>": 263,
998
+ "Ķ": 153,
999
+ "Ķ</w>": 229,
1000
+ "ķ": 154,
1001
+ "ķ</w>": 242,
1002
+ "ĸ": 155,
1003
+ "ĸ</w>": 266,
1004
+ "Ĺ": 156,
1005
+ "Ĺ</w>": 230,
1006
+ "ĺ": 157,
1007
+ "ĺ</w>": 237,
1008
+ "Ļ": 158,
1009
+ "Ļ</w>": 233,
1010
+ "ļ": 159,
1011
+ "ļ</w>": 250,
1012
+ "Ľ": 160,
1013
+ "Ľ</w>": 254,
1014
+ "ľ": 161,
1015
+ "ľ</w>": 251,
1016
+ "Ŀ": 162,
1017
+ "Ŀ</w>": 245,
1018
+ "ŀ": 163,
1019
+ "ŀ</w>": 244,
1020
+ "Ł": 164,
1021
+ "Ł</w>": 220,
1022
+ "ł": 165,
1023
+ "ł</w>": 241,
1024
+ "Ń": 166,
1025
+ "Ń</w>": 213
1026
+ }