Samuael commited on
Commit
e9caaab
β€’
1 Parent(s): e3b2b9b

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -4
  2. tokenizer_config.json +12 -12
  3. vocab.json +36 -34
added_tokens.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "</s>": 38,
3
- "<s>": 37,
4
- "[PAD]": 40,
5
- "[UNK]": 39
6
  }
 
1
  {
2
+ "</s>": 40,
3
+ "<s>": 39
 
 
4
  }
tokenizer_config.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "added_tokens_decoder": {
3
- "37": {
4
- "content": "<s>",
5
- "lstrip": false,
6
  "normalized": false,
7
- "rstrip": false,
8
  "single_word": false,
9
- "special": true
10
  },
11
- "38": {
12
- "content": "</s>",
13
- "lstrip": false,
14
  "normalized": false,
15
- "rstrip": false,
16
  "single_word": false,
17
- "special": true
18
  },
19
  "39": {
20
- "content": "[UNK]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -25,7 +25,7 @@
25
  "special": true
26
  },
27
  "40": {
28
- "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": true,
6
  "normalized": false,
7
+ "rstrip": true,
8
  "single_word": false,
9
+ "special": false
10
  },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": true,
14
  "normalized": false,
15
+ "rstrip": true,
16
  "single_word": false,
17
+ "special": false
18
  },
19
  "39": {
20
+ "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  },
27
  "40": {
28
+ "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
vocab.json CHANGED
@@ -1,39 +1,41 @@
1
  {
2
- " ": 16,
3
- "ል": 19,
4
- "αˆ•": 9,
5
- "ም": 33,
6
- "ር": 24,
7
- "ሡ": 23,
8
- "ሽ": 26,
9
- "α‰…": 21,
10
- "ቕ": 14,
11
- "α‰₯": 25,
12
- "α‰­": 6,
13
- "ቡ": 17,
14
- "ች": 2,
15
- "αŠ•": 20,
16
- "ኝ": 34,
17
- "ኑ": 31,
18
- "ኒ": 12,
19
- "ኣ": 29,
20
- "ኀ": 5,
 
 
21
  "αŠ₯": 22,
22
- "ኦ": 27,
23
- "ኧ": 18,
24
- "ክ": 30,
25
- "ኽ": 28,
26
- "ው": 10,
27
  "ዝ": 7,
28
- "α‹₯": 1,
29
- "α‹­": 8,
30
- "α‹΅": 15,
31
- "αŒ…": 35,
32
- "ግ": 36,
33
- "αŒ₯": 0,
34
- "ጭ": 13,
35
  "ጡ": 11,
36
- "ጽ": 3,
37
- "ፍ": 32,
38
- "ፕ": 4
39
  }
 
1
  {
2
+ "[PAD]": 0,
3
+ "[UNK]": 1,
4
+ "|": 2,
5
+ "ል": 37,
6
+ "αˆ•": 30,
7
+ "ም": 13,
8
+ "ር": 8,
9
+ "ሡ": 17,
10
+ "ሽ": 3,
11
+ "α‰…": 26,
12
+ "ቕ": 9,
13
+ "α‰₯": 29,
14
+ "α‰­": 35,
15
+ "ቡ": 34,
16
+ "ች": 20,
17
+ "αŠ•": 36,
18
+ "ኝ": 27,
19
+ "ኑ": 21,
20
+ "ኒ": 31,
21
+ "ኣ": 14,
22
+ "ኀ": 32,
23
  "αŠ₯": 22,
24
+ "ኦ": 12,
25
+ "ኧ": 15,
26
+ "ክ": 28,
27
+ "ኽ": 6,
28
+ "ው": 25,
29
  "ዝ": 7,
30
+ "α‹₯": 4,
31
+ "α‹­": 23,
32
+ "α‹΅": 5,
33
+ "αŒ…": 18,
34
+ "ግ": 38,
35
+ "αŒ₯": 10,
36
+ "ጭ": 33,
37
  "ጡ": 11,
38
+ "ጽ": 16,
39
+ "ፍ": 19,
40
+ "ፕ": 24
41
  }