anyiwang commited on
Commit
3e10211
·
verified ·
1 Parent(s): 94c2e50

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +33 -1
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -52,6 +57,12 @@
52
  "id": "A",
53
  "type_id": 0
54
  }
 
 
 
 
 
 
55
  }
56
  ],
57
  "pair": [
@@ -67,6 +78,12 @@
67
  "type_id": 0
68
  }
69
  },
 
 
 
 
 
 
70
  {
71
  "SpecialToken": {
72
  "id": "<s>",
@@ -78,9 +95,24 @@
78
  "id": "B",
79
  "type_id": 1
80
  }
 
 
 
 
 
 
81
  }
82
  ],
83
  "special_tokens": {
 
 
 
 
 
 
 
 
 
84
  "<s>": {
85
  "id": "<s>",
86
  "ids": [
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 1024,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
57
  "id": "A",
58
  "type_id": 0
59
  }
60
+ },
61
+ {
62
+ "SpecialToken": {
63
+ "id": "</s>",
64
+ "type_id": 0
65
+ }
66
  }
67
  ],
68
  "pair": [
 
78
  "type_id": 0
79
  }
80
  },
81
+ {
82
+ "SpecialToken": {
83
+ "id": "</s>",
84
+ "type_id": 0
85
+ }
86
+ },
87
  {
88
  "SpecialToken": {
89
  "id": "<s>",
 
95
  "id": "B",
96
  "type_id": 1
97
  }
98
+ },
99
+ {
100
+ "SpecialToken": {
101
+ "id": "</s>",
102
+ "type_id": 1
103
+ }
104
  }
105
  ],
106
  "special_tokens": {
107
+ "</s>": {
108
+ "id": "</s>",
109
+ "ids": [
110
+ 2
111
+ ],
112
+ "tokens": [
113
+ "</s>"
114
+ ]
115
+ },
116
  "<s>": {
117
  "id": "<s>",
118
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {