Teja-Gollapudi commited on
Commit
5c1213e
1 Parent(s): 16df352

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -1
  2. special_tokens_map.json +3 -3
  3. tokenizer.json +5 -5
added_tokens.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "[PAD]": 32000
3
  }
 
1
  {
2
+ "\n": 32000
3
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "bos_token": "</s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "</s>"
6
  }
 
1
  {
2
+ "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "<unk>",
5
+ "unk_token": "<unk>"
6
  }
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
@@ -18,7 +18,7 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
@@ -32,12 +32,12 @@
32
  },
33
  {
34
  "id": 32000,
35
- "content": "[PAD]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  }
42
  ],
43
  "normalizer": {
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
 
32
  },
33
  {
34
  "id": 32000,
35
+ "content": "\n",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": true,
40
+ "special": false
41
  }
42
  ],
43
  "normalizer": {