Guilherme34 commited on
Commit
9423a94
1 Parent(s): e9d5a9c

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -16,7 +16,13 @@
16
  "rstrip": false,
17
  "single_word": false
18
  },
19
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
20
  "unk_token": {
21
  "content": "<unk>",
22
  "lstrip": false,
 
16
  "rstrip": false,
17
  "single_word": false
18
  },
19
+ "pad_token": {
20
+ "content": "<|endoftext|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
  "unk_token": {
27
  "content": "<unk>",
28
  "lstrip": false,
tokenizer.json CHANGED
@@ -404,12 +404,6 @@
404
  "id": "A",
405
  "type_id": 0
406
  }
407
- },
408
- {
409
- "SpecialToken": {
410
- "id": "<|endoftext|>",
411
- "type_id": 0
412
- }
413
  }
414
  ],
415
  "pair": [
@@ -425,12 +419,6 @@
425
  "type_id": 0
426
  }
427
  },
428
- {
429
- "SpecialToken": {
430
- "id": "<|endoftext|>",
431
- "type_id": 0
432
- }
433
- },
434
  {
435
  "SpecialToken": {
436
  "id": "<s>",
@@ -442,12 +430,6 @@
442
  "id": "B",
443
  "type_id": 1
444
  }
445
- },
446
- {
447
- "SpecialToken": {
448
- "id": "<|endoftext|>",
449
- "type_id": 1
450
- }
451
  }
452
  ],
453
  "special_tokens": {
@@ -459,15 +441,6 @@
459
  "tokens": [
460
  "<s>"
461
  ]
462
- },
463
- "<|endoftext|>": {
464
- "id": "<|endoftext|>",
465
- "ids": [
466
- 32000
467
- ],
468
- "tokens": [
469
- "<|endoftext|>"
470
- ]
471
  }
472
  }
473
  },
 
404
  "id": "A",
405
  "type_id": 0
406
  }
 
 
 
 
 
 
407
  }
408
  ],
409
  "pair": [
 
419
  "type_id": 0
420
  }
421
  },
 
 
 
 
 
 
422
  {
423
  "SpecialToken": {
424
  "id": "<s>",
 
430
  "id": "B",
431
  "type_id": 1
432
  }
 
 
 
 
 
 
433
  }
434
  ],
435
  "special_tokens": {
 
441
  "tokens": [
442
  "<s>"
443
  ]
 
 
 
 
 
 
 
 
 
444
  }
445
  }
446
  },
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -338,6 +338,7 @@
338
  "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
339
  "clean_up_tokenization_spaces": false,
340
  "eos_token": "<|endoftext|>",
 
341
  "model_max_length": 131072,
342
  "pad_token": "<|endoftext|>",
343
  "padding_side": "left",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
338
  "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
339
  "clean_up_tokenization_spaces": false,
340
  "eos_token": "<|endoftext|>",
341
+ "legacy": false,
342
  "model_max_length": 131072,
343
  "pad_token": "<|endoftext|>",
344
  "padding_side": "left",