JaphetHernandez commited on
Commit
075f4a9
·
verified ·
1 Parent(s): a7d2c50

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +5 -1
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:510d12ec255f4cb0304aa5428d699c354c1a49696b427a2748a7b03bb7bbb575
3
- size 17210296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476b711d0ef387ddcdaae99abb9686498fdff008d33308d865970b0815c6c4d0
3
+ size 17210394
tokenizer_config.json CHANGED
@@ -2073,11 +2073,15 @@
2073
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2074
  "clean_up_tokenization_spaces": true,
2075
  "eos_token": "<|im_end|>",
 
2076
  "model_input_names": [
2077
  "input_ids",
2078
  "attention_mask"
2079
  ],
2080
  "model_max_length": 131072,
2081
  "pad_token": "<|im_end|>",
2082
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
2083
  }
 
2073
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2074
  "clean_up_tokenization_spaces": true,
2075
  "eos_token": "<|im_end|>",
2076
+ "max_length": 512,
2077
  "model_input_names": [
2078
  "input_ids",
2079
  "attention_mask"
2080
  ],
2081
  "model_max_length": 131072,
2082
  "pad_token": "<|im_end|>",
2083
+ "stride": 0,
2084
+ "tokenizer_class": "PreTrainedTokenizerFast",
2085
+ "truncation_side": "right",
2086
+ "truncation_strategy": "longest_first"
2087
  }