diwank commited on
Commit
e644401
1 Parent(s): 860185b

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +3 -3
  2. tokenizer_config.json +6 -4
special_tokens_map.json CHANGED
@@ -60,14 +60,14 @@
60
  "bos_token": {
61
  "content": "<s>",
62
  "lstrip": false,
63
- "normalized": false,
64
  "rstrip": false,
65
  "single_word": false
66
  },
67
  "eos_token": {
68
  "content": "</s>",
69
  "lstrip": false,
70
- "normalized": false,
71
  "rstrip": false,
72
  "single_word": false
73
  },
@@ -75,7 +75,7 @@
75
  "unk_token": {
76
  "content": "<unk>",
77
  "lstrip": false,
78
- "normalized": false,
79
  "rstrip": false,
80
  "single_word": false
81
  }
 
60
  "bos_token": {
61
  "content": "<s>",
62
  "lstrip": false,
63
+ "normalized": true,
64
  "rstrip": false,
65
  "single_word": false
66
  },
67
  "eos_token": {
68
  "content": "</s>",
69
  "lstrip": false,
70
+ "normalized": true,
71
  "rstrip": false,
72
  "single_word": false
73
  },
 
75
  "unk_token": {
76
  "content": "<unk>",
77
  "lstrip": false,
78
+ "normalized": true,
79
  "rstrip": false,
80
  "single_word": false
81
  }
tokenizer_config.json CHANGED
@@ -1,9 +1,11 @@
1
  {
 
 
2
  "bos_token": {
3
  "__type": "AddedToken",
4
  "content": "<s>",
5
  "lstrip": false,
6
- "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
@@ -12,12 +14,12 @@
12
  "__type": "AddedToken",
13
  "content": "</s>",
14
  "lstrip": false,
15
- "normalized": false,
16
  "rstrip": false,
17
  "single_word": false
18
  },
19
  "legacy": false,
20
- "model_max_length": 1000000000000000019884624838656,
21
  "pad_token": null,
22
  "sp_model_kwargs": {},
23
  "tokenizer_class": "LlamaTokenizer",
@@ -25,7 +27,7 @@
25
  "__type": "AddedToken",
26
  "content": "<unk>",
27
  "lstrip": false,
28
- "normalized": false,
29
  "rstrip": false,
30
  "single_word": false
31
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<s>",
7
  "lstrip": false,
8
+ "normalized": true,
9
  "rstrip": false,
10
  "single_word": false
11
  },
 
14
  "__type": "AddedToken",
15
  "content": "</s>",
16
  "lstrip": false,
17
+ "normalized": true,
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
  "legacy": false,
22
+ "model_max_length": 2048,
23
  "pad_token": null,
24
  "sp_model_kwargs": {},
25
  "tokenizer_class": "LlamaTokenizer",
 
27
  "__type": "AddedToken",
28
  "content": "<unk>",
29
  "lstrip": false,
30
+ "normalized": true,
31
  "rstrip": false,
32
  "single_word": false
33
  }