Transformers
PyTorch
code
custom_code
Inference Endpoints
codesage commited on
Commit
a4a3da0
1 Parent(s): 9eb00ca

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +32 -0
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<|endoftext|>",
5
+ "<fim_prefix>",
6
+ "<fim_middle>",
7
+ "<fim_suffix>",
8
+ "<fim_pad>",
9
+ "<filename>",
10
+ "<gh_stars>",
11
+ "<issue_start>",
12
+ "<issue_comment>",
13
+ "<issue_closed>",
14
+ "<jupyter_start>",
15
+ "<jupyter_text>",
16
+ "<jupyter_code>",
17
+ "<jupyter_output>",
18
+ "<empty_output>",
19
+ "<commit_before>",
20
+ "<commit_msg>",
21
+ "<commit_after>",
22
+ "<reponame>"
23
+ ],
24
+ "bos_token": "<|endoftext|>",
25
+ "clean_up_tokenization_spaces": true,
26
+ "eos_token": "<|endoftext|>",
27
+ "add_eos_token": true,
28
+ "model_max_length": 1000000000000000019884624838656,
29
+ "tokenizer_class": "CodeSageTokenizer",
30
+ "unk_token": "<|endoftext|>",
31
+ "vocab_size": 49152
32
+ }