ammar commited on
Commit
10af1cd
β€’
1 Parent(s): 132c96f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +5 -1
  2. tokenizer_config.json +12 -1
special_tokens_map.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
3
  "▁<PRE>",
4
  "▁<MID>",
5
  "▁<SUF>",
@@ -19,7 +23,7 @@
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
- "pad_token": "<unk>",
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>",
7
  "▁<PRE>",
8
  "▁<MID>",
9
  "▁<SUF>",
 
23
  "rstrip": false,
24
  "single_word": false
25
  },
26
+ "pad_token": "</s>",
27
  "unk_token": {
28
  "content": "<unk>",
29
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -60,6 +60,10 @@
60
  }
61
  },
62
  "additional_special_tokens": [
 
 
 
 
63
  "▁<PRE>",
64
  "▁<MID>",
65
  "▁<SUF>",
@@ -71,13 +75,20 @@
71
  "eot_token": "▁<EOT>",
72
  "fill_token": "<FILL_ME>",
73
  "legacy": null,
 
74
  "middle_token": "▁<MID>",
75
  "model_max_length": 1000000000000000019884624838656,
76
- "pad_token": "<unk>",
 
 
 
77
  "prefix_token": "▁<PRE>",
78
  "sp_model_kwargs": {},
 
79
  "suffix_token": "▁<SUF>",
80
  "tokenizer_class": "CodeLlamaTokenizer",
 
 
81
  "unk_token": "<unk>",
82
  "use_default_system_prompt": false
83
  }
 
60
  }
61
  },
62
  "additional_special_tokens": [
63
+ "▁<PRE>",
64
+ "▁<MID>",
65
+ "▁<SUF>",
66
+ "▁<EOT>",
67
  "▁<PRE>",
68
  "▁<MID>",
69
  "▁<SUF>",
 
75
  "eot_token": "▁<EOT>",
76
  "fill_token": "<FILL_ME>",
77
  "legacy": null,
78
+ "max_length": 512,
79
  "middle_token": "▁<MID>",
80
  "model_max_length": 1000000000000000019884624838656,
81
+ "pad_to_multiple_of": null,
82
+ "pad_token": "</s>",
83
+ "pad_token_type_id": 0,
84
+ "padding_side": "right",
85
  "prefix_token": "▁<PRE>",
86
  "sp_model_kwargs": {},
87
+ "stride": 0,
88
  "suffix_token": "▁<SUF>",
89
  "tokenizer_class": "CodeLlamaTokenizer",
90
+ "truncation_side": "right",
91
+ "truncation_strategy": "longest_first",
92
  "unk_token": "<unk>",
93
  "use_default_system_prompt": false
94
  }